Thrill  0.1
simple_glob.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/vfs/simple_glob.hpp
3  *
4  * A sane, simple, portable implementation of glob(). Used only on Windows.
5  *
6  * copied from SimpleOpt/SimpleGlob under MIT License
7  * Copyright (c) 2006-2013, Brodie Thiesfield
8  *
9  * Part of Project Thrill - http://project-thrill.org
10  *
11  * Copyright (C) 2015 Timo Bingmann <[email protected]>
12  *
13  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
14  ******************************************************************************/
15 
16 #pragma once
17 #ifndef THRILL_VFS_SIMPLE_GLOB_HEADER
18 #define THRILL_VFS_SIMPLE_GLOB_HEADER
19 
20 // ---------------------------------------------------------------------------
21 // Platform dependent implementations
22 
23 // if we aren't on Windows and we have ICU available, then enable ICU
24 // by default. Define this to 0 to intentially disable it.
25 #ifndef SG_HAVE_ICU
26 # if !defined(_WIN32) && defined(USTRING_H)
27 # define SG_HAVE_ICU 1
28 # else
29 # define SG_HAVE_ICU 0
30 # endif
31 #endif
32 
33 // don't include this in documentation as it isn't relevant
34 #ifndef DOXYGEN
35 
36 // on Windows we want to use MBCS aware string functions and mimic the
37 // Unix glob functionality. On Unix we just use glob.
38 #ifdef _WIN32
39 # include <mbstring.h>
40 # include <windows.h>
41 # define sg_strchr ::_mbschr
42 # define sg_strrchr ::_mbsrchr
43 # define sg_strlen ::_mbslen
44 # if __STDC_WANT_SECURE_LIB__
45 # define sg_strcpy_s(a, n, b) ::_mbscpy_s(a, n, b)
46 # else
47 # define sg_strcpy_s(a, n, b) ::_mbscpy(a, b)
48 # endif
49 # define sg_strcmp ::_mbscmp
50 # define sg_strcasecmp ::_mbsicmp
51 # define SOCHAR_T unsigned char
52 #else
53 # include <climits>
54 # include <glob.h> // NOLINT
55 # include <sys/stat.h> // NOLINT
56 # include <sys/types.h> // NOLINT
57 # define MAX_PATH PATH_MAX
58 # define sg_strchr ::strchr
59 # define sg_strrchr ::strrchr
60 # define sg_strlen ::strlen
61 # define sg_strcpy_s(a, n, b) ::strcpy(a, b) // NOLINT
62 # define sg_strcmp ::strcmp
63 # define sg_strcasecmp ::strcasecmp
64 # define SOCHAR_T char
65 #endif
66 
67 #include <wchar.h>
68 
69 #include <cstdlib>
70 #include <cstring>
71 
72 // use assertions to test the input data
73 #ifdef _DEBUG
74 # ifdef _MSC_VER
75 # include <crtdbg.h>
76 # define SG_ASSERT(b) _ASSERTE(b)
77 # else
78 # include <cassert>
79 # define SG_ASSERT(b) assert(b)
80 # endif
81 #else
82 # define SG_ASSERT(b)
83 #endif
84 
85 namespace thrill {
86 namespace vfs {
87 namespace glob_local {
88 
89 /*! \file simple_glob.hpp
90 
91  \version 3.6
92 
93  \brief A cross-platform file globbing library providing the ability to
94  expand wildcards in command-line arguments to a list of all matching
95  files. It is designed explicitly to be portable to any platform and has
96  been tested on Windows and Linux. See CSimpleGlobTempl for the class
97  definition.
98 
99  \section features FEATURES
100  - MIT Licence allows free use in all software (including GPL and
101  commercial)
102  - multi-platform (Windows 95/98/ME/NT/2K/XP, Linux, Unix)
103  - supports most of the standard linux glob() options
104  - recognition of a forward paths as equivalent to a backward slash
105  on Windows. e.g. "c:/path/foo*" is equivalent to "c:\path\foo*".
106  - implemented with only a single C++ header file
107  - char, wchar_t and Windows TCHAR in the same program
108  - complete working examples included
109  - compiles cleanly at warning level 4 (Windows/VC.NET 2003),
110  warning level 3 (Windows/VC6) and -Wall (Linux/gcc)
111 
112  \section usage USAGE
113  The SimpleGlob class is used by following these steps:
114  <ol>
115  <li> Include the SimpleGlob.h header file
116 
117  <pre>
118  \#include "SimpleGlob.h"
119  </pre>
120 
121  <li> Instantiate a CSimpleGlob object supplying the appropriate flags.
122 
123  <pre>
124  CSimpleGlob glob(FLAGS);
125  </pre>
126 
127  <li> Add all file specifications to the glob class.
128 
129  <pre>
130  glob.Add("file*");
131  glob.Add(argc, argv);
132  </pre>
133 
134  <li> Process all files with File(), Files() and FileCount()
135 
136  <pre>
137  for (int n = 0; n < glob.FileCount(); ++n) {
138  ProcessFile(glob.File(n));
139  }
140  </pre>
141 
142  </ol>
143 
144  \section licence MIT LICENCE
145 <pre>
146  The licence text below is the boilerplate "MIT Licence" used from:
147  http://www.opensource.org/licenses/mit-license.php
148 
149  Copyright (c) 2006-2013, Brodie Thiesfield
150 
151  Permission is hereby granted, free of charge, to any person obtaining a
152  copy of this software and associated documentation files (the "Software"),
153  to deal in the Software without restriction, including without limitation
154  the rights to use, copy, modify, merge, publish, distribute, sublicense,
155  and/or sell copies of the Software, and to permit persons to whom the
156  Software is furnished to do so, subject to the following conditions:
157 
158  The above copyright notice and this permission notice shall be included
159  in all copies or substantial portions of the Software.
160 
161  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
162  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
163  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
164  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
165  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
166  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
167  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
168 </pre>
169 */
170 
171 /*! \brief The operation of SimpleGlob is fine-tuned via the use of a
172  combination of the following flags.
173 
174  The flags may be passed at initialization of the class and used for every
175  filespec added, or alternatively they may optionally be specified in the
176  call to Add() and be different for each filespec.
177 
178  \param SG_GLOB_ERR
179  Return upon read error (e.g. directory does not have read permission)
180 
181  \param SG_GLOB_MARK
182  Append a slash (backslash in Windows) to every path which corresponds
183  to a directory
184 
185  \param SG_GLOB_NOSORT
186  By default, files are returned in sorted into string order. With this
187  flag, no sorting is done. This is not compatible with
188  SG_GLOB_FULLSORT.
189 
190  \param SG_GLOB_FULLSORT
191  By default, files are sorted in groups belonging to each filespec that
192  was added. For example if the filespec "b*" was added before the
193  filespec "a*" then the argv array will contain all b* files sorted in
194  order, followed by all a* files sorted in order. If this flag is
195  specified, the entire array will be sorted ignoring the filespec
196  groups.
197 
198  \param SG_GLOB_NOCHECK
199  If the pattern doesn't match anything, return the original pattern.
200 
201  \param SG_GLOB_TILDE
202  Tilde expansion is carried out (on Unix platforms)
203 
204  \param SG_GLOB_ONLYDIR
205  Return only directories which match (not compatible with
206  SG_GLOB_ONLYFILE)
207 
208  \param SG_GLOB_ONLYFILE
209  Return only files which match (not compatible with SG_GLOB_ONLYDIR)
210 
211  \param SG_GLOB_NODOT
212  Do not return the "." or ".." special directories.
213  */
214 enum SG_Flags {
215  SG_GLOB_ERR = 1 << 0,
216  SG_GLOB_MARK = 1 << 1,
217  SG_GLOB_NOSORT = 1 << 2,
218  SG_GLOB_NOCHECK = 1 << 3,
219  SG_GLOB_TILDE = 1 << 4,
220  SG_GLOB_ONLYDIR = 1 << 5,
222  SG_GLOB_NODOT = 1 << 7,
224 };
225 
226 /*! \brief Error return codes */
227 enum SG_Error {
232 };
233 
234 /*! \brief String manipulation functions. */
236 {
237 public:
238  static const char * strchr(const char* s, char c) {
239  return reinterpret_cast<const char*>(
240  sg_strchr(reinterpret_cast<const SOCHAR_T*>(s), c));
241  }
242  static const wchar_t * strchr(const wchar_t* s, wchar_t c) {
243  return ::wcschr(s, c);
244  }
245 #if SG_HAVE_ICU
246  static const UChar * strchr(const UChar* s, UChar c) {
247  return ::u_strchr(s, c);
248  }
249 #endif
250 
251  static const char * strrchr(const char* s, char c) {
252  return reinterpret_cast<const char*>(
253  sg_strrchr(reinterpret_cast<const SOCHAR_T*>(s), c));
254  }
255  static const wchar_t * strrchr(const wchar_t* s, wchar_t c) {
256  return ::wcsrchr(s, c);
257  }
258 #if SG_HAVE_ICU
259  static const UChar * strrchr(const UChar* s, UChar c) {
260  return ::u_strrchr(s, c);
261  }
262 #endif
263 
264  // Note: char strlen returns number of bytes, not characters
265  static size_t strlen(const char* s) { return ::strlen(s); }
266  static size_t strlen(const wchar_t* s) { return ::wcslen(s); }
267 #if SG_HAVE_ICU
268  static size_t strlen(const UChar* s) { return ::u_strlen(s); }
269 #endif
270 
271  static void strcpy_s(char* dst, size_t n, const char* src) {
272  (void)n;
273  sg_strcpy_s(reinterpret_cast<SOCHAR_T*>(dst), n,
274  reinterpret_cast<const SOCHAR_T*>(src));
275  }
276  static void strcpy_s(wchar_t* dst, size_t n, const wchar_t* src) {
277 # if __STDC_WANT_SECURE_LIB__
278  ::wcscpy_s(dst, n, src);
279 #else
280  (void)n;
281  ::wcscpy(dst, src);
282 #endif
283  }
284 #if SG_HAVE_ICU
285  static void strcpy_s(UChar* dst, size_t n, const UChar* src) {
286  ::u_strncpy(dst, src, n);
287  }
288 #endif
289 
290  static int strcmp(const char* s1, const char* s2) {
291  return sg_strcmp((const SOCHAR_T*)s1, (const SOCHAR_T*)s2);
292  }
293  static int strcmp(const wchar_t* s1, const wchar_t* s2) {
294  return ::wcscmp(s1, s2);
295  }
296 #if SG_HAVE_ICU
297  static int strcmp(const UChar* s1, const UChar* s2) {
298  return ::u_strcmp(s1, s2);
299  }
300 #endif
301 
302  static int strcasecmp(const char* s1, const char* s2) {
303  return sg_strcasecmp((const SOCHAR_T*)s1, (const SOCHAR_T*)s2);
304  }
305 #if _WIN32
306  static int strcasecmp(const wchar_t* s1, const wchar_t* s2) {
307  return ::_wcsicmp(s1, s2);
308  }
309 #endif // _WIN32
310 #if SG_HAVE_ICU
311  static int strcasecmp(const UChar* s1, const UChar* s2) {
312  return u_strcasecmp(s1, s2, 0);
313  }
314 #endif
315 };
316 
321 };
322 
323 #ifdef _WIN32
324 
325 #ifndef INVALID_FILE_ATTRIBUTES
326 # define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
327 #endif
328 
329 #define SG_PATH_CHAR '\\'
330 
331 /*! \brief Windows glob implementation. */
332 template <typename SOCHAR>
333 class SimpleGlobBase
334 {
335 public:
336  SimpleGlobBase() : m_hFind(INVALID_HANDLE_VALUE) { }
337 
338  int FindFirstFileS(const char* a_pszFileSpec, unsigned int) {
339  m_hFind = FindFirstFileA(a_pszFileSpec, &m_oFindDataA);
340  if (m_hFind != INVALID_HANDLE_VALUE) {
341  return SG_SUCCESS;
342  }
343  DWORD dwErr = GetLastError();
344  if (dwErr == ERROR_FILE_NOT_FOUND) {
345  return SG_ERR_NOMATCH;
346  }
347  return SG_ERR_FAILURE;
348  }
349  int FindFirstFileS(const wchar_t* a_pszFileSpec, unsigned int) {
350  m_hFind = FindFirstFileW(a_pszFileSpec, &m_oFindDataW);
351  if (m_hFind != INVALID_HANDLE_VALUE) {
352  return SG_SUCCESS;
353  }
354  DWORD dwErr = GetLastError();
355  if (dwErr == ERROR_FILE_NOT_FOUND) {
356  return SG_ERR_NOMATCH;
357  }
358  return SG_ERR_FAILURE;
359  }
360 
361  bool FindNextFileS(char) { // NOLINT
362  return FindNextFileA(m_hFind, &m_oFindDataA) != FALSE;
363  }
364  bool FindNextFileS(wchar_t) { // NOLINT
365  return FindNextFileW(m_hFind, &m_oFindDataW) != FALSE;
366  }
367 
368  void FindDone() {
369  FindClose(m_hFind);
370  }
371 
372  const char * GetFileNameS(char) const { // NOLINT
373  return m_oFindDataA.cFileName;
374  }
375  const wchar_t * GetFileNameS(wchar_t) const { // NOLINT
376  return m_oFindDataW.cFileName;
377  }
378 
379  bool IsDirS(char) const { // NOLINT
380  return this->GetFileTypeS(m_oFindDataA.dwFileAttributes) == SG_FILETYPE_DIR;
381  }
382  bool IsDirS(wchar_t) const { // NOLINT
383  return this->GetFileTypeS(m_oFindDataW.dwFileAttributes) == SG_FILETYPE_DIR;
384  }
385 
386  SG_FileType GetFileTypeS(const char* a_pszPath) {
387  return this->GetFileTypeS(GetFileAttributesA(a_pszPath));
388  }
389  SG_FileType GetFileTypeS(const wchar_t* a_pszPath) {
390  return this->GetFileTypeS(GetFileAttributesW(a_pszPath));
391  }
392  SG_FileType GetFileTypeS(DWORD a_dwAttribs) const {
393  if (a_dwAttribs == INVALID_FILE_ATTRIBUTES) {
394  return SG_FILETYPE_INVALID;
395  }
396  if (a_dwAttribs & FILE_ATTRIBUTE_DIRECTORY) {
397  return SG_FILETYPE_DIR;
398  }
399  return SG_FILETYPE_FILE;
400  }
401 
402 private:
403  HANDLE m_hFind;
404  WIN32_FIND_DATAA m_oFindDataA;
405  WIN32_FIND_DATAW m_oFindDataW;
406 };
407 
408 #else // !_WIN32
409 
410 #define SG_PATH_CHAR '/'
411 
412 /*! \brief Unix glob implementation. */
413 template <typename SOCHAR>
415 {
416 public:
418  memset(&glob_, 0, sizeof(glob_));
419  ui_curr_ = (size_t)-1;
420  }
421 
423  globfree(&glob_);
424  }
425 
426  void FilePrep() {
427  b_isdir_ = false;
428  size_t len = strlen(glob_.gl_pathv[ui_curr_]);
429  if (glob_.gl_pathv[ui_curr_][len - 1] == '/') {
430  b_isdir_ = true;
431  glob_.gl_pathv[ui_curr_][len - 1] = 0;
432  }
433  }
434 
435  int FindFirstFileS(const char* a_pszFileSpec, unsigned int a_uiFlags) {
436  int nflags = GLOB_MARK | GLOB_NOSORT;
437  if (a_uiFlags & SG_GLOB_ERR) nflags |= GLOB_ERR;
438  if (a_uiFlags & SG_GLOB_TILDE) nflags |= GLOB_TILDE;
439  int rc = glob(a_pszFileSpec, nflags, nullptr, &glob_);
440  if (rc == GLOB_NOSPACE) return SG_ERR_MEMORY;
441  if (rc == GLOB_ABORTED) return SG_ERR_FAILURE;
442  if (rc == GLOB_NOMATCH) return SG_ERR_NOMATCH;
443  ui_curr_ = 0;
444  FilePrep();
445  return SG_SUCCESS;
446  }
447 
448 #if SG_HAVE_ICU
449  int FindFirstFileS(const UChar* a_pszFileSpec, unsigned int a_uiFlags) {
450  char buf[PATH_MAX] = { 0 };
451  UErrorCode status = U_ZERO_ERROR;
452  u_strToUTF8(buf, sizeof(buf), nullptr, a_pszFileSpec, -1, &status);
453  if (U_FAILURE(status)) return SG_ERR_FAILURE;
454  return this->FindFirstFileS(buf, a_uiFlags);
455  }
456 #endif
457 
458  bool FindNextFileS(char) { // NOLINT
459  SG_ASSERT(ui_curr_ != (size_t)-1);
460  if (++ui_curr_ >= glob_.gl_pathc) {
461  return false;
462  }
463  FilePrep();
464  return true;
465  }
466 
467 #if SG_HAVE_ICU
468  bool FindNextFileS(UChar) { // NOLINT
469  return this->FindNextFileS(static_cast<char>(0));
470  }
471 #endif
472 
473  void FindDone() {
474  globfree(&glob_);
475  memset(&glob_, 0, sizeof(glob_));
476  ui_curr_ = (size_t)-1;
477  }
478 
479  const char * GetFileNameS(char) const { // NOLINT
480  SG_ASSERT(ui_curr_ != (size_t)-1);
481  return glob_.gl_pathv[ui_curr_];
482  }
483 
484 #if SG_HAVE_ICU
485  const UChar * GetFileNameS(UChar) const { // NOLINT
486  const char* pszFile = this->GetFileNameS(static_cast<char>(0));
487  if (!pszFile) return nullptr;
488  UErrorCode status = U_ZERO_ERROR;
489  memset(m_szBuf, 0, sizeof(m_szBuf));
490  u_strFromUTF8(m_szBuf, PATH_MAX, nullptr, pszFile, -1, &status);
491  if (U_FAILURE(status)) return nullptr;
492  return m_szBuf;
493  }
494 #endif
495 
496  bool IsDirS(char) const { // NOLINT
497  SG_ASSERT(ui_curr_ != (size_t)-1);
498  return b_isdir_;
499  }
500 
501 #if SG_HAVE_ICU
502  bool IsDirS(UChar) const { // NOLINT
503  return this->IsDirS(static_cast<char>(0));
504  }
505 #endif
506 
507  SG_FileType GetFileTypeS(const char* a_pszPath) const {
508  struct stat sb;
509  if (0 != stat(a_pszPath, &sb)) {
510  return SG_FILETYPE_INVALID;
511  }
512  if (S_ISDIR(sb.st_mode)) {
513  return SG_FILETYPE_DIR;
514  }
515  if (S_ISREG(sb.st_mode)) {
516  return SG_FILETYPE_FILE;
517  }
518  return SG_FILETYPE_INVALID;
519  }
520 
521 #if SG_HAVE_ICU
522  SG_FileType GetFileTypeS(const UChar* a_pszPath) const {
523  char buf[PATH_MAX] = { 0 };
524  UErrorCode status = U_ZERO_ERROR;
525  u_strToUTF8(buf, sizeof(buf), nullptr, a_pszPath, -1, &status);
526  if (U_FAILURE(status)) return SG_FILETYPE_INVALID;
527  return this->GetFileTypeS(buf);
528  }
529 #endif
530 
531 private:
532  glob_t glob_;
533  size_t ui_curr_;
534  bool b_isdir_;
535 #if SG_HAVE_ICU
536  mutable UChar m_szBuf[PATH_MAX];
537 #endif
538 };
539 
540 #endif // _WIN32
541 
542 #endif // DOXYGEN
543 
544 // ---------------------------------------------------------------------------
545 // MAIN TEMPLATE CLASS
546 // ---------------------------------------------------------------------------
547 
548 /*! \brief Implementation of the SimpleGlob class */
549 template <typename SOCHAR>
550 class CSimpleGlobTempl : private SimpleGlobBase<SOCHAR>
551 {
552 public:
553  /*! \brief Initialize the class.
554 
555  \param a_uiFlags Combination of SG_GLOB flags.
556  \param a_nReservedSlots Number of slots in the argv array that
557  should be reserved. In the returned array these slots
558  argv[0] ... argv[a_nReservedSlots-1] will be left empty for
559  the caller to fill in.
560  */
561  explicit CSimpleGlobTempl(unsigned int a_uiFlags = 0, int a_nReservedSlots = 0);
562 
563  /*! \brief Deallocate all memory buffers. */
564  ~CSimpleGlobTempl();
565 
566  /*! \brief Initialize (or re-initialize) the class in preparation for
567  adding new filespecs.
568 
569  All existing files are cleared. Note that allocated memory is only
570  deallocated at object destruction.
571 
572  \param a_uiFlags Combination of SG_GLOB flags.
573  \param a_nReservedSlots Number of slots in the argv array that
574  should be reserved. In the returned array these slots
575  argv[0] ... argv[a_nReservedSlots-1] will be left empty for
576  the caller to fill in.
577  */
578  int Init(unsigned int a_uiFlags = 0, int a_nReservedSlots = 0);
579 
580  /*! \brief Add a new filespec to the glob.
581 
582  The filesystem will be immediately scanned for all matching files and
583  directories and they will be added to the glob.
584 
585  \param a_pszFileSpec Filespec to add to the glob.
586 
587  \return SG_SUCCESS Matching files were added to the glob.
588  \return SG_ERR_NOMATCH Nothing matched the pattern. To ignore this
589  error compare return value to >= SG_SUCCESS.
590  \return SG_ERR_MEMORY Out of memory failure.
591  \return SG_ERR_FAILURE General failure.
592  */
593  int Add(const SOCHAR* a_pszFileSpec);
594 
595  /*! \brief Add an array of filespec to the glob.
596 
597  The filesystem will be immediately scanned for all matching files and
598  directories in each filespec and they will be added to the glob.
599 
600  \param a_nCount Number of filespec in the array.
601  \param a_rgpszFileSpec Array of filespec to add to the glob.
602 
603  \return SG_SUCCESS Matching files were added to the glob.
604  \return SG_ERR_NOMATCH Nothing matched the pattern. To ignore this
605  error compare return value to >= SG_SUCCESS.
606  \return SG_ERR_MEMORY Out of memory failure.
607  \return SG_ERR_FAILURE General failure.
608  */
609  int Add(int a_nCount, const SOCHAR* const* a_rgpszFileSpec);
610 
611  /*! \brief Return the number of files in the argv array.
612  */
613  inline int FileCount() const { return m_nArgsLen; }
614 
615  /*! \brief Return the full argv array. */
616  inline SOCHAR ** Files() {
617  SetArgvArrayType(POINTERS);
618  return m_rgpArgs;
619  }
620 
621  /*! \brief Return the a single file. */
622  inline SOCHAR * File(int n) {
623  SG_ASSERT(n >= 0 && n < m_nArgsLen);
624  return Files()[n];
625  }
626 
627 private:
628  CSimpleGlobTempl(const CSimpleGlobTempl&); // disabled
629  CSimpleGlobTempl& operator = (const CSimpleGlobTempl&); // disabled
630 
631  /*! \brief The argv array has it's members stored as either an offset into
632  the string buffer, or as pointers to their string in the buffer. The
633  offsets are used because if the string buffer is dynamically resized,
634  all pointers into that buffer would become invalid.
635  */
636  enum ARG_ARRAY_TYPE { OFFSETS, POINTERS };
637 
638  /*! \brief Change the type of data stored in the argv array. */
639  void SetArgvArrayType(ARG_ARRAY_TYPE a_nNewType);
640 
641  /*! \brief Add a filename to the array if it passes all requirements. */
642  int AppendName(const SOCHAR* a_pszFileName, bool a_bIsDir);
643 
644  /*! \brief Grow the argv array to the required size. */
645  bool GrowArgvArray(int a_nNewLen);
646 
647  /*! \brief Grow the string buffer to the required size. */
648  bool GrowStringBuffer(size_t a_uiMinSize);
649 
650  /*! \brief Compare two (possible nullptr) strings */
651  static int fileSortCompare(const void* a1, const void* a2);
652 
653 private:
654  unsigned int m_uiFlags;
655  ARG_ARRAY_TYPE m_nArgArrayType; //!< argv is indexes or pointers
656  SOCHAR** m_rgpArgs; //!< argv
657  int m_nReservedSlots; //!< # client slots in argv array
658  int m_nArgsSize; //!< allocated size of array
659  int m_nArgsLen; //!< used length
660  SOCHAR* m_pBuffer; //!< argv string buffer
661  size_t m_uiBufferSize; //!< allocated size of buffer
662  size_t m_uiBufferLen; //!< used length of buffer
663  SOCHAR m_szPathPrefix[MAX_PATH]; //!< wildcard path prefix
664 };
665 
666 // ---------------------------------------------------------------------------
667 // IMPLEMENTATION
668 // ---------------------------------------------------------------------------
669 
670 template <typename SOCHAR>
672  unsigned int a_uiFlags,
673  int a_nReservedSlots
674  ) {
675  m_rgpArgs = nullptr;
676  m_nArgsSize = 0;
677  m_pBuffer = nullptr;
678  m_uiBufferSize = 0;
679 
680  Init(a_uiFlags, a_nReservedSlots);
681 }
682 
683 template <typename SOCHAR>
685  if (m_rgpArgs) free(m_rgpArgs);
686  if (m_pBuffer) free(m_pBuffer);
687 }
688 
689 template <typename SOCHAR>
690 int
692  unsigned int a_uiFlags,
693  int a_nReservedSlots
694  ) {
695  m_nArgArrayType = POINTERS;
696  m_uiFlags = a_uiFlags;
697  m_nArgsLen = a_nReservedSlots;
698  m_nReservedSlots = a_nReservedSlots;
699  m_uiBufferLen = 0;
700 
701  if (m_nReservedSlots > 0) {
702  if (!GrowArgvArray(m_nReservedSlots)) {
703  return SG_ERR_MEMORY;
704  }
705  for (int n = 0; n < m_nReservedSlots; ++n) {
706  m_rgpArgs[n] = nullptr;
707  }
708  }
709 
710  return SG_SUCCESS;
711 }
712 
713 template <typename SOCHAR>
714 int
716  const SOCHAR* a_pszFileSpec
717  ) {
718 #ifdef _WIN32
719  // Windows FindFirst/FindNext recognizes forward slash as the same as
720  // backward slash and follows the directories. We need to do the same
721  // when calculating the prefix and when we have no wildcards.
722  SOCHAR szFileSpec[MAX_PATH];
723  SimpleGlobUtil::strcpy_s(szFileSpec, MAX_PATH, a_pszFileSpec);
724  const SOCHAR* pszPath = SimpleGlobUtil::strchr(szFileSpec, '/');
725  while (pszPath) {
726  szFileSpec[pszPath - szFileSpec] = SG_PATH_CHAR;
727  pszPath = SimpleGlobUtil::strchr(pszPath + 1, '/');
728  }
729  a_pszFileSpec = szFileSpec;
730 #endif
731 
732  // if this doesn't contain wildcards then we can just add it directly
733  m_szPathPrefix[0] = 0;
734  if (!SimpleGlobUtil::strchr(a_pszFileSpec, '*') &&
735  !SimpleGlobUtil::strchr(a_pszFileSpec, '?'))
736  {
737  SG_FileType nType = this->GetFileTypeS(a_pszFileSpec);
738  if (nType == SG_FILETYPE_INVALID) {
739  if (m_uiFlags & SG_GLOB_NOCHECK) {
740  return AppendName(a_pszFileSpec, false);
741  }
742  return SG_ERR_NOMATCH;
743  }
744  return AppendName(a_pszFileSpec, nType == SG_FILETYPE_DIR);
745  }
746 
747 #ifdef _WIN32
748  // Windows doesn't return the directory with the filename, so we need to
749  // extract the path from the search string ourselves and prefix it to the
750  // filename we get back.
751  const SOCHAR* pszFilename =
752  SimpleGlobUtil::strrchr(a_pszFileSpec, SG_PATH_CHAR);
753  if (pszFilename) {
754  SimpleGlobUtil::strcpy_s(m_szPathPrefix, MAX_PATH, a_pszFileSpec);
755  m_szPathPrefix[pszFilename - a_pszFileSpec + 1] = 0;
756  }
757 #endif
758 
759  // search for the first match on the file
760  int rc = this->FindFirstFileS(a_pszFileSpec, m_uiFlags);
761  if (rc != SG_SUCCESS) {
762  if (rc == SG_ERR_NOMATCH && (m_uiFlags & SG_GLOB_NOCHECK)) {
763  int ok = AppendName(a_pszFileSpec, false);
764  if (ok != SG_SUCCESS) rc = ok;
765  }
766  return rc;
767  }
768 
769  // add it and find all subsequent matches
770  int nError, nStartLen = m_nArgsLen;
771  bool bSuccess;
772  do {
773  nError = AppendName(this->GetFileNameS((SOCHAR)0), this->IsDirS((SOCHAR)0));
774  bSuccess = this->FindNextFileS((SOCHAR)0);
775  }
776  while (nError == SG_SUCCESS && bSuccess); // NOLINT
778 
779  // sort these files if required
780  if (m_nArgsLen > nStartLen && !(m_uiFlags & SG_GLOB_NOSORT)) {
781  if (m_uiFlags & SG_GLOB_FULLSORT) {
782  nStartLen = m_nReservedSlots;
783  }
784  SetArgvArrayType(POINTERS);
785  qsort(
786  m_rgpArgs + nStartLen,
787  m_nArgsLen - nStartLen,
788  sizeof(m_rgpArgs[0]), fileSortCompare);
789  }
790 
791  return nError;
792 }
793 
794 template <typename SOCHAR>
795 int
797  int a_nCount,
798  const SOCHAR* const* a_rgpszFileSpec
799  ) {
800  int nResult;
801  for (int n = 0; n < a_nCount; ++n) {
802  nResult = Add(a_rgpszFileSpec[n]);
803  if (nResult != SG_SUCCESS) {
804  return nResult;
805  }
806  }
807  return SG_SUCCESS;
808 }
809 
810 template <typename SOCHAR>
811 int
813  const SOCHAR* a_pszFileName,
814  bool a_bIsDir
815  ) {
816  // we need the argv array as offsets in case we resize it
817  SetArgvArrayType(OFFSETS);
818 
819  // check for special cases which cause us to ignore this entry
820  if ((m_uiFlags & SG_GLOB_ONLYDIR) && !a_bIsDir) {
821  return SG_SUCCESS;
822  }
823  if ((m_uiFlags & SG_GLOB_ONLYFILE) && a_bIsDir) {
824  return SG_SUCCESS;
825  }
826  if ((m_uiFlags & SG_GLOB_NODOT) && a_bIsDir) {
827  if (a_pszFileName[0] == '.') {
828  if (a_pszFileName[1] == '\0') {
829  return SG_SUCCESS;
830  }
831  if (a_pszFileName[1] == '.' && a_pszFileName[2] == '\0') {
832  return SG_SUCCESS;
833  }
834  }
835  }
836 
837  // ensure that we have enough room in the argv array
838  if (!GrowArgvArray(m_nArgsLen + 1)) {
839  return SG_ERR_MEMORY;
840  }
841 
842  // ensure that we have enough room in the string buffer (+1 for null)
843  size_t uiPrefixLen = SimpleGlobUtil::strlen(m_szPathPrefix);
844  size_t uiLen = uiPrefixLen + SimpleGlobUtil::strlen(a_pszFileName) + 1;
845  if (a_bIsDir && (m_uiFlags & SG_GLOB_MARK) == SG_GLOB_MARK) {
846  ++uiLen; // need space for the backslash
847  }
848  if (!GrowStringBuffer(m_uiBufferLen + uiLen)) {
849  return SG_ERR_MEMORY;
850  }
851 
852  // add this entry. m_uiBufferLen is offset from beginning of buffer.
853  m_rgpArgs[m_nArgsLen++] = reinterpret_cast<SOCHAR*>(m_uiBufferLen);
854  SimpleGlobUtil::strcpy_s(m_pBuffer + m_uiBufferLen,
855  m_uiBufferSize - m_uiBufferLen, m_szPathPrefix);
856  SimpleGlobUtil::strcpy_s(m_pBuffer + m_uiBufferLen + uiPrefixLen,
857  m_uiBufferSize - m_uiBufferLen - uiPrefixLen, a_pszFileName);
858  m_uiBufferLen += uiLen;
859 
860  // add the directory slash if desired
861  if (a_bIsDir && (m_uiFlags & SG_GLOB_MARK) == SG_GLOB_MARK) {
862  static const SOCHAR szDirSlash[] = { SG_PATH_CHAR, 0 };
863  SimpleGlobUtil::strcpy_s(m_pBuffer + m_uiBufferLen - 2,
864  m_uiBufferSize - (m_uiBufferLen - 2), szDirSlash);
865  }
866 
867  return SG_SUCCESS;
868 }
869 
870 template <typename SOCHAR>
871 void
873  ARG_ARRAY_TYPE a_nNewType
874  ) {
875  if (m_nArgArrayType == a_nNewType) return;
876  if (a_nNewType == POINTERS) {
877  SG_ASSERT(m_nArgArrayType == OFFSETS);
878  for (int n = 0; n < m_nArgsLen; ++n) {
879  m_rgpArgs[n] = (m_rgpArgs[n] == reinterpret_cast<SOCHAR*>(-1)) ?
880  nullptr : m_pBuffer + (size_t)m_rgpArgs[n];
881  }
882  }
883  else {
884  SG_ASSERT(a_nNewType == OFFSETS);
885  SG_ASSERT(m_nArgArrayType == POINTERS);
886  for (int n = 0; n < m_nArgsLen; ++n) {
887  m_rgpArgs[n] = (m_rgpArgs[n] == nullptr) ?
888  reinterpret_cast<SOCHAR*>(-1) :
889  reinterpret_cast<SOCHAR*>(m_rgpArgs[n] - m_pBuffer);
890  }
891  }
892  m_nArgArrayType = a_nNewType;
893 }
894 
895 template <typename SOCHAR>
896 bool
898  int a_nNewLen
899  ) {
900  if (a_nNewLen >= m_nArgsSize) {
901  static const int SG_ARGV_INITIAL_SIZE = 32;
902  int nNewSize = (m_nArgsSize > 0) ?
903  m_nArgsSize * 2 : SG_ARGV_INITIAL_SIZE;
904  while (a_nNewLen >= nNewSize) {
905  nNewSize *= 2;
906  }
907  void* pNewBuffer = realloc(m_rgpArgs, nNewSize * sizeof(SOCHAR*));
908  if (!pNewBuffer) return false;
909  m_nArgsSize = nNewSize;
910  m_rgpArgs = reinterpret_cast<SOCHAR**>(pNewBuffer);
911  }
912  return true;
913 }
914 
915 template <typename SOCHAR>
916 bool
918  size_t a_uiMinSize
919  ) {
920  if (a_uiMinSize >= m_uiBufferSize) {
921  static const int SG_BUFFER_INITIAL_SIZE = 1024;
922  size_t uiNewSize = (m_uiBufferSize > 0) ?
923  m_uiBufferSize * 2 : SG_BUFFER_INITIAL_SIZE;
924  while (a_uiMinSize >= uiNewSize) {
925  uiNewSize *= 2;
926  }
927  void* pNewBuffer = realloc(m_pBuffer, uiNewSize * sizeof(SOCHAR));
928  if (!pNewBuffer) return false;
929  m_uiBufferSize = uiNewSize;
930  m_pBuffer = reinterpret_cast<SOCHAR*>(pNewBuffer);
931  }
932  return true;
933 }
934 
935 template <typename SOCHAR>
936 int
938  const void* a1,
939  const void* a2
940  ) {
941  const SOCHAR* s1 = *(const SOCHAR**)a1;
942  const SOCHAR* s2 = *(const SOCHAR**)a2;
943  if (s1 && s2) {
944  return SimpleGlobUtil::strcasecmp(s1, s2);
945  }
946  // nullptr sorts first
947  return s1 == s2 ? 0 : (s1 ? 1 : -1);
948 }
949 
950 // ---------------------------------------------------------------------------
951 // TYPE DEFINITIONS
952 // ---------------------------------------------------------------------------
953 
954 /*! \brief ASCII/MBCS version of CSimpleGlob */
956 
957 /*! \brief wchar_t version of CSimpleGlob */
959 
960 #if SG_HAVE_ICU
961 /*! \brief UChar version of CSimpleGlob */
962 using CSimpleGlobU = CSimpleGlobTempl<UChar>;
963 #endif
964 
965 #ifdef _UNICODE
966 /*! \brief TCHAR version dependent on if _UNICODE is defined */
967 # if SG_HAVE_ICU
968 # define CSimpleGlob CSimpleGlobU
969 # else
970 # define CSimpleGlob CSimpleGlobW
971 # endif
972 #else
973 /*! \brief TCHAR version dependent on if _UNICODE is defined */
974 # define CSimpleGlob CSimpleGlobA
975 #endif
976 
977 } // namespace glob_local
978 } // namespace vfs
979 } // namespace thrill
980 
981 #endif // !THRILL_VFS_SIMPLE_GLOB_HEADER
982 
983 /******************************************************************************/
int m_nArgsSize
allocated size of array
const char * GetFileNameS(char) const
static const char * strrchr(const char *s, char c)
static void strcpy_s(char *dst, size_t n, const char *src)
static int strcasecmp(const char *s1, const char *s2)
~CSimpleGlobTempl()
Deallocate all memory buffers.
#define SG_PATH_CHAR
static size_t strlen(const char *s)
ARG_ARRAY_TYPE m_nArgArrayType
argv is indexes or pointers
int FindFirstFileS(const char *a_pszFileSpec, unsigned int a_uiFlags)
SOCHAR * m_pBuffer
argv string buffer
size_t m_uiBufferLen
used length of buffer
bool GrowArgvArray(int a_nNewLen)
Grow the argv array to the required size.
int AppendName(const SOCHAR *a_pszFileName, bool a_bIsDir)
Add a filename to the array if it passes all requirements.
SG_Error
Error return codes.
#define MAX_PATH
Definition: simple_glob.hpp:57
int Add(const SOCHAR *a_pszFileSpec)
Add a new filespec to the glob.
SOCHAR ** Files()
Return the full argv array.
#define sg_strcasecmp
Definition: simple_glob.hpp:63
#define sg_strrchr
Definition: simple_glob.hpp:59
Implementation of the SimpleGlob class.
static int strcmp(const wchar_t *s1, const wchar_t *s2)
size_t m_uiBufferSize
allocated size of buffer
#define sg_strcmp
Definition: simple_glob.hpp:62
#define sg_strchr
Definition: simple_glob.hpp:58
String manipulation functions.
int m_nReservedSlots
client slots in argv array
#define SG_ASSERT(b)
Definition: simple_glob.hpp:82
static const wchar_t * strrchr(const wchar_t *s, wchar_t c)
void SetArgvArrayType(ARG_ARRAY_TYPE a_nNewType)
Change the type of data stored in the argv array.
int FileCount() const
Return the number of files in the argv array.
static int strcmp(const char *s1, const char *s2)
static size_t strlen(const wchar_t *s)
SOCHAR * File(int n)
Return the a single file.
SG_Flags
The operation of SimpleGlob is fine-tuned via the use of a combination of the following flags...
static void strcpy_s(wchar_t *dst, size_t n, const wchar_t *src)
ARG_ARRAY_TYPE
The argv array has it&#39;s members stored as either an offset into the string buffer, or as pointers to their string in the buffer.
int Init(unsigned int a_uiFlags=0, int a_nReservedSlots=0)
Initialize (or re-initialize) the class in preparation for adding new filespecs.
static const wchar_t * strchr(const wchar_t *s, wchar_t c)
SG_FileType GetFileTypeS(const char *a_pszPath) const
#define sg_strcpy_s(a, n, b)
Definition: simple_glob.hpp:61
#define SOCHAR_T
Definition: simple_glob.hpp:64
void free(void *ptr) NOEXCEPT
exported free symbol that overrides loading from libc
CSimpleGlobTempl(unsigned int a_uiFlags=0, int a_nReservedSlots=0)
Initialize the class.
bool GrowStringBuffer(size_t a_uiMinSize)
Grow the string buffer to the required size.
void * realloc(void *ptr, size_t size) NOEXCEPT
exported realloc() symbol that overrides loading from libc
static const char * strchr(const char *s, char c)
static int fileSortCompare(const void *a1, const void *a2)
Compare two (possible nullptr) strings.