Thrill  0.1
file_io.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/vfs/file_io.hpp
3  *
4  * Abstract interfaces of virtual file system (VFS) layer
5  *
6  * Part of Project Thrill - http://project-thrill.org
7  *
8  * Copyright (C) 2015 Alexander Noe <[email protected]>
9  * Copyright (C) 2015-2016 Timo Bingmann <[email protected]>
10  *
11  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
12  ******************************************************************************/
13 
14 #pragma once
15 #ifndef THRILL_VFS_FILE_IO_HEADER
16 #define THRILL_VFS_FILE_IO_HEADER
17 
18 #include <thrill/common/math.hpp>
21 #include <tlx/counting_ptr.hpp>
22 
23 #include <string>
24 #include <vector>
25 
26 namespace thrill {
27 namespace vfs {
28 
29 /******************************************************************************/
30 
31 //! Initialize VFS layer
32 void Initialize();
33 
34 //! Deinitialize VFS layer
35 void Deinitialize();
36 
37 /******************************************************************************/
38 
39 //! function which takes pathbase and replaces $$$ with worker and ### with
40 //! the file_part values.
42  size_t worker, size_t file_part);
43 
44 //! Returns true, if file at filepath is compressed (e.g, ends with
45 //! '.{gz,bz2,xz,lzo}')
46 bool IsCompressed(const std::string& path);
47 
48 //! Returns true, if file at filepath is a remote uri like s3:// or hdfs://
49 bool IsRemoteUri(const std::string& path);
50 
51 //! VFS object type
52 enum class Type { File, Directory };
53 
54 std::ostream& operator << (std::ostream& os, const Type& t);
55 
56 //! General information of vfs file.
57 struct FileInfo {
58  //! type of entry
60  //! path to file
62  //! size of file.
63  uint64_t size;
64  //! exclusive prefix sum of file sizes.
65  uint64_t size_ex_psum;
66 
67  //! inclusive prefix sum of file sizes.
68  uint64_t size_inc_psum() const { return size_ex_psum + size; }
69  //! if the file is compressed
70  bool IsCompressed() const { return vfs::IsCompressed(path); }
71  //! if the file is at remote uri
72  bool IsRemoteUri() const { return vfs::IsRemoteUri(path); }
73 
74  //! compare FileInfo by path
75  bool operator < (const FileInfo& b) const { return path < b.path; }
76 };
77 
78 //! List of file info and additional overall info.
79 struct FileList : public std::vector<FileInfo> {
80  //! total size of files
81  uint64_t total_size;
82 
83  //! whether the list contains a compressed file.
85 
86  //! whether the list contains a remote-uri file.
88 
89  //! inclusive prefix sum of file sizes (only for symmetry with ex_psum)
90  uint64_t size_inc_psum(size_t i) const
91  { return operator [] (i).size_inc_psum(); }
92 
93  //! exclusive prefix sum of file sizes with total_size as sentinel
94  uint64_t size_ex_psum(size_t i) const
95  { return i < size() ? operator [] (i).size_ex_psum : total_size; }
96 };
97 
98 //! Type of objects to include in glob result.
99 enum class GlobType { All, File, Directory };
100 
101 /*!
102  * Reads a glob path list and deliver a file list, sizes, and prefixsums (in
103  * bytes) for all matching files.
104  */
105 FileList Glob(const std::string& glob, const GlobType& gtype = GlobType::All);
106 
107 /*!
108  * Reads a glob path list and deliver a file list, sizes, and prefixsums (in
109  * bytes) for all matching files.
110  */
111 FileList Glob(const std::vector<std::string>& globlist,
112  const GlobType& gtype = GlobType::All);
113 
114 /******************************************************************************/
115 
116 /*!
117  * Reader object from any source. Streams can be created for any supported URI
118  * and seek to the given range's offset.
119  */
120 class ReadStream : public virtual tlx::ReferenceCounter
121 {
122 public:
123  virtual ~ReadStream();
124 
125  //! read up to size bytes from stream.
126  virtual ssize_t read(void* data, size_t size) = 0;
127 
128  //! close stream, release resources.
129  virtual void close() = 0;
130 };
131 
132 /*!
133  * Writer object to output data to any supported URI.
134  */
135 class WriteStream : public virtual tlx::ReferenceCounter
136 {
137 public:
138  virtual ~WriteStream();
139 
140  virtual ssize_t write(const void* data, size_t size) = 0;
141 
142  virtual void close() = 0;
143 };
144 
147 
148 /******************************************************************************/
149 
150 /*!
151  * Construct reader for given path uri. Range is the byte range [b,e) inside the
152  * file to read. If e = 0, the complete file is read.
153  *
154  * For the POSIX SysFile implementation the range is used only to seek to the
155  * byte offset b. It allows additional bytes after e to be read.
156  *
157  * For the S3File implementations, however, the range[b,e) is used to determine
158  * which data to fetch from S3. Hence, once e is reached, read() will return
159  * EOF.
160  */
162  const std::string& path, const common::Range& range = common::Range());
163 
165 
166 /******************************************************************************/
167 
168 } // namespace vfs
169 } // namespace thrill
170 
171 #endif // !THRILL_VFS_FILE_IO_HEADER
172 
173 /******************************************************************************/
bool IsCompressed() const
if the file is compressed
Definition: file_io.hpp:70
Type type
type of entry
Definition: file_io.hpp:59
Writer object to output data to any supported URI.
Definition: file_io.hpp:135
Reader object from any source.
Definition: file_io.hpp:120
FileList Glob(const std::vector< std::string > &globlist, const GlobType &gtype)
Reads a glob path list and deliver a file list, sizes, and prefixsums (in bytes) for all matching fil...
Definition: file_io.cpp:128
uint64_t total_size
total size of files
Definition: file_io.hpp:81
void Initialize()
Initialize VFS layer.
Definition: file_io.cpp:35
GlobType
Type of objects to include in glob result.
Definition: file_io.hpp:99
std::string FillFilePattern(const std::string &pathbase, size_t worker, size_t file_part)
Definition: file_io.cpp:71
Type
VFS object type.
Definition: file_io.hpp:52
ReadStreamPtr OpenReadStream(const std::string &path, const common::Range &range)
Construct reader for given path uri.
Definition: file_io.cpp:180
represents a 1 dimensional range (interval) [begin,end)
Definition: math.hpp:41
bool contains_remote_uri
whether the list contains a remote-uri file.
Definition: file_io.hpp:87
General information of vfs file.
Definition: file_io.hpp:57
bool IsRemoteUri() const
if the file is at remote uri
Definition: file_io.hpp:72
uint64_t size
size of file.
Definition: file_io.hpp:63
uint64_t size_inc_psum(size_t i) const
inclusive prefix sum of file sizes (only for symmetry with ex_psum)
Definition: file_io.hpp:90
void Deinitialize()
Deinitialize VFS layer.
Definition: file_io.cpp:40
std::string path
path to file
Definition: file_io.hpp:61
uint64_t size_ex_psum(size_t i) const
exclusive prefix sum of file sizes with total_size as sentinel
Definition: file_io.hpp:94
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
std::vector< T, Allocator< T > > vector
vector with Manager tracking
Definition: allocator.hpp:228
bool IsCompressed(const std::string &path)
Definition: file_io.cpp:47
High-performance smart pointer used as a wrapping reference counting pointer.
List of file info and additional overall info.
Definition: file_io.hpp:79
bool IsRemoteUri(const std::string &path)
Returns true, if file at filepath is a remote uri like s3:// or hdfs://.
Definition: file_io.cpp:55
std::ostream & operator<<(std::ostream &os, const Type &t)
Definition: file_io.cpp:60
bool operator<(const uint_pair &b) const
less-than comparison operator
Definition: uint_types.hpp:187
bool contains_compressed
whether the list contains a compressed file.
Definition: file_io.hpp:84
uint64_t size_inc_psum() const
inclusive prefix sum of file sizes.
Definition: file_io.hpp:68
WriteStreamPtr OpenWriteStream(const std::string &path)
Definition: file_io.cpp:211
uint64_t size_ex_psum
exclusive prefix sum of file sizes.
Definition: file_io.hpp:65
Provides reference counting abilities for use with CountingPtr.