65 return os <<
"Directory";
67 return os <<
"Invalid";
72 size_t worker,
size_t file_part) {
74 static constexpr
bool debug =
false;
76 using size_type = std::string::size_type;
83 size_type slash_end = out_path.rfind(
'/');
84 size_type dot_end = out_path.rfind(
'.');
85 if (dot_end != std::string::npos &&
87 (slash_end == std::string::npos || slash_end < dot_end)) {
88 extension = out_path.substr(dot_end);
89 out_path.erase(dot_end);
94 size_type at_end = out_path.rfind(
'@');
95 size_type at_begin = out_path.find_last_not_of(
'@', at_end);
98 at_end != std::string::npos && at_end > at_begin
99 ? at_end - at_begin : 4;
101 sLOG <<
"at_length" << at_length;
102 out_path.replace(at_begin + 1, at_length,
104 static_cast<int>(at_length),
109 size_type hash_end = out_path.rfind(
'#');
110 size_type hash_begin = out_path.find_last_not_of(
'#', hash_end);
112 size_type hash_length =
113 hash_end != std::string::npos && hash_end > hash_begin
114 ? hash_end - hash_begin : 10;
116 sLOG <<
"hash_length" << hash_length;
117 out_path.replace(hash_begin + 1, hash_length,
119 static_cast<int>(hash_length),
122 out_path += extension;
138 SysGlob(path.substr(7), gtype, filelist);
141 S3Glob(path, gtype, filelist);
147 SysGlob(path, gtype, filelist);
156 uint64_t size_ex_psum = 0;
160 uint64_t size_next = size_ex_psum + fi.size;
161 fi.size_ex_psum = size_ex_psum;
162 size_ex_psum = size_next;
164 filelist.contains_compressed |= fi.IsCompressed();
165 filelist.contains_remote_uri |= fi.IsRemoteUri();
166 filelist.total_size += fi.size;
173 return Glob(std::vector<std::string>{ glob }, gtype);
#define sLOG
Default logging method: output if the local debug variable is true.
FileList Glob(const std::vector< std::string > &globlist, const GlobType >ype)
Reads a glob path list and deliver a file list, sizes, and prefixsums (in bytes) for all matching fil...
uint64_t total_size
total size of files
WriteStreamPtr Hdfs3OpenWriteStream(const std::string &)
void Initialize()
Initialize VFS layer.
GlobType
Type of objects to include in glob result.
std::string FillFilePattern(const std::string &pathbase, size_t worker, size_t file_part)
bool starts_with(const char *str, const char *match)
Checks if the given match string is located at the start of this string.
ReadStreamPtr OpenReadStream(const std::string &path, const common::Range &range)
Construct reader for given path uri.
represents a 1 dimensional range (interval) [begin,end)
bool contains_remote_uri
whether the list contains a remote-uri file.
General information of vfs file.
ReadStreamPtr MakeBZip2ReadFilter(const ReadStreamPtr &)
void S3Glob(const std::string &, const GlobType &, FileList &)
ReadStreamPtr S3OpenReadStream(const std::string &, const common::Range &)
bool ends_with(const char *str, const char *match)
Checks if the given match string is located at the end of this string.
void Deinitialize()
Deinitialize VFS layer.
WriteStreamPtr SysOpenWriteStream(const std::string &path)
Open file for writing and return file descriptor.
ReadStreamPtr MakeGZipReadFilter(const ReadStreamPtr &)
ReadStreamPtr Hdfs3OpenReadStream(const std::string &, const common::Range &)
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
static constexpr bool debug
void SysGlob(const std::string &path, const GlobType >ype, FileList &filelist)
Glob a path and augment the FileList with matching file names.
WriteStreamPtr MakeGZipWriteFilter(const WriteStreamPtr &)
bool IsCompressed(const std::string &path)
High-performance smart pointer used as a wrapping reference counting pointer.
WriteStreamPtr S3OpenWriteStream(const std::string &)
ReadStreamPtr SysOpenReadStream(const std::string &path, const common::Range &range)
Open file for reading and return file descriptor.
List of file info and additional overall info.
bool IsRemoteUri(const std::string &path)
Returns true, if file at filepath is a remote uri like s3:// or hdfs://.
std::ostream & operator<<(std::ostream &os, const Type &t)
std::string ssnprintf(size_t max_size, const char *fmt,...)
Helper for return the result of a snprintf() call inside a std::string.
WriteStreamPtr MakeBZip2WriteFilter(const WriteStreamPtr &)
bool contains_compressed
whether the list contains a compressed file.
WriteStreamPtr OpenWriteStream(const std::string &path)
void Hdfs3Glob(const std::string &, const GlobType &, FileList &)