% This data is distributed under the terms of the Open Data Commons Attribution License (ODC-By) v1.0 - See more at: http://opendatacommons.org/licenses/by/1-0/ % All issues of Volume 3 % Volume 3, Issue 1, 2016 @Article{OJSW_2016v3i1n01_Peixoto, title = {Hierarchical Multi-Label Classification Using Web Reasoning for Large Datasets}, author = {Rafael Peixoto and Thomas Hassan and Christophe Cruz and Aur\'{e}lie Bertaux and Nuno Silva}, journal = {Open Journal of Semantic Web (OJSW)}, issn = {2199-336X}, year = {2016}, volume = {3}, number = {1}, pages = {1--15}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201705194907}, urn = {urn:nbn:de:101:1-201705194907}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {Extracting valuable data among large volumes of data is one of the main challenges in Big Data. In this paper, a Hierarchical Multi-Label Classification process called Semantic HMC is presented. This process aims to extract valuable data from very large data sources, by automatically learning a label hierarchy and classifying data items.The Semantic HMC process is composed of five scalable steps, namely Indexation, Vectorization, Hierarchization, Resolution and Realization. The first three steps construct automatically a label hierarchy from statistical analysis of data. This paper focuses on the last two steps which perform item classification according to the label hierarchy. The process is implemented as a scalable and distributed application, and deployed on a Big Data platform. A quality evaluation is described, which compares the approach with multi-label classification algorithms from the state of the art dedicated to the same goal. The Semantic HMC approach outperforms state of the art approaches in some areas.} } @Article{OJSW_2016v3i1n02_Tatu, title = {A Semantic Question Answering Framework for Large Data Sets}, author = {Marta Tatu and Mithun Balakrishna and Steven Werner and Tatiana Erekhinskaya and Dan Moldovan}, journal = {Open Journal of Semantic Web (OJSW)}, issn = {2199-336X}, year = {2016}, volume = {3}, number = {1}, pages = {16--31}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201705194921}, urn = {urn:nbn:de:101:1-201705194921}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {Traditionally, the task of answering natural language questions has involved a keyword-based document retrieval step, followed by in-depth processing of candidate answer documents and paragraphs. This post-processing uses semantics to various degrees. In this article, we describe a purely semantic question answering (QA) framework for large document collections. Our high-precision approach transforms the semantic knowledge extracted from natural language texts into a language-agnostic RDF representation and indexes it into a scalable triplestore. In order to facilitate easy access to the information stored in the RDF semantic index, a user's natural language questions are translated into SPARQL queries that return precise answers back to the user. The robustness of this framework is ensured by the natural language reasoning performed on the RDF store, by the query relaxation procedures, and the answer ranking techniques. The improvements in performance over a regular free text search index-based question answering engine prove that QA systems can benefit greatly from the addition and consumption of deep semantic information.} } @Article{OJSW_2016v3i1n03_Smid, title = {OnGIS: Semantic Query Broker for Heterogeneous Geospatial Data Sources}, author = {Marek Smid and Petr Kremen}, journal = {Open Journal of Semantic Web (OJSW)}, issn = {2199-336X}, year = {2016}, volume = {3}, number = {1}, pages = {32--50}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201705194936}, urn = {urn:nbn:de:101:1-201705194936}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {Querying geospatial data from multiple heterogeneous sources backed by different management technologies poses an interesting problem in the data integration and in the subsequent result interpretation. This paper proposes broker techniques for answering a user's complex spatial query: finding relevant data sources (from a catalogue of data sources) capable of answering the query, eventually splitting the query and finding relevant data sources for the query parts, when no single source suffices. For the purpose, we describe each source with a set of prototypical queries that are algorithmically arranged into a lattice, which makes searching efficient. The proposed algorithms leverage GeoSPARQL query containment enhanced with OWL 2 QL semantics. A prototype is implemented in a system called OnGIS.} }