% This data is distributed under the terms of the Open Data Commons Attribution License (ODC-By) v1.0 - See more at: http://opendatacommons.org/licenses/by/1-0/ @Article{OJBD_2015v1i2n04_UjjalMarjit, title = {Data Transfers in Hadoop: A Comparative Study}, author = {Ujjal Marjit and Kumar Sharma and Puspendu Mandal}, journal = {Open Journal of Big Data (OJBD)}, issn = {2365-029X}, year = {2015}, volume = {1}, number = {2}, pages = {34--46}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201705194373}, urn = {urn:nbn:de:101:1-201705194373}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {Hadoop is an open source framework for processing large amounts of data in distributed computing environment. It plays an important role in processing and analyzing the Big Data. This framework is used for storing data on large clusters of commodity hardware. Data input and output to and from Hadoop is an indispensable action for any data processing job. At present, many tools have been evolved for importing and exporting Data in Hadoop. In this article, some commonly used tools for importing and exporting data have been emphasized. Moreover, a state-of-the-art comparative study among the various tools has been made. With this study, it has been decided that where to use one tool over the other with emphasis on the data transfer to and from Hadoop system. This article also discusses about how Hadoop handles backup and disaster recovery along with some open research questions in terms of Big Data transfer when dealing with cloud-based services.} }