Line data Source code
1 : /* Copyright (C) 2022 Wildfire Games.
2 : *
3 : * Permission is hereby granted, free of charge, to any person obtaining
4 : * a copy of this software and associated documentation files (the
5 : * "Software"), to deal in the Software without restriction, including
6 : * without limitation the rights to use, copy, modify, merge, publish,
7 : * distribute, sublicense, and/or sell copies of the Software, and to
8 : * permit persons to whom the Software is furnished to do so, subject to
9 : * the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included
12 : * in all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 : */
22 :
23 : /*
24 : * Path string class, similar to boost::filesystem::basic_path.
25 : */
26 :
27 : // notes:
28 : // - this module is independent of lib/file so that it can be used from
29 : // other code without pulling in the entire file manager.
30 : // - there is no restriction on buffer lengths except the underlying OS.
31 : // input buffers must not exceed PATH_MAX chars, while outputs
32 : // must hold at least that much.
33 : // - unless otherwise mentioned, all functions are intended to work with
34 : // native and VFS paths.
35 : // when reading, both '/' and SYS_DIR_SEP are accepted; '/' is written.
36 :
37 : #ifndef INCLUDED_PATH
38 : #define INCLUDED_PATH
39 :
40 : #include "lib/utf8.h"
41 :
42 : #include <algorithm>
43 : #include <cstring>
44 : #include <functional>
45 :
46 : namespace ERR
47 : {
48 : const Status PATH_CHARACTER_ILLEGAL = -100300;
49 : const Status PATH_CHARACTER_UNSAFE = -100301;
50 : const Status PATH_NOT_FOUND = -100302;
51 : const Status PATH_MIXED_SEPARATORS = -100303;
52 : }
53 :
54 : /**
55 : * is s2 a subpath of s1, or vice versa? (equal counts as subpath)
56 : *
57 : * @param s1, s2 comparand strings
58 : * @return bool
59 : **/
60 : bool path_is_subpath(const wchar_t* s1, const wchar_t* s2);
61 :
62 : /**
63 : * Get the path component of a path.
64 : * Skips over all characters up to the last dir separator, if any.
65 : *
66 : * @param path Input path.
67 : * @return pointer to path component within \<path\>.
68 : **/
69 : const wchar_t* path_name_only(const wchar_t* path);
70 :
71 :
72 : // NB: there is a need for 'generic' paths (e.g. for Trace entry / archive pathnames).
73 : // converting between specialized variants via c_str would be inefficient, and the
74 : // Os/VfsPath types are hopefully sufficient to avoid errors.
75 236702 : class Path
76 : {
77 : public:
78 : using String = std::wstring;
79 :
80 4650 : Path()
81 4650 : {
82 4650 : DetectSeparator();
83 4650 : }
84 :
85 60576 : Path(const Path& p)
86 60576 : : path(p.path)
87 : {
88 60576 : DetectSeparator();
89 60576 : }
90 :
91 6861 : Path(const char* p)
92 13722 : : path((const unsigned char*)p, (const unsigned char*)p+strlen(p))
93 : // interpret bytes as unsigned; makes no difference for ASCII,
94 : // and ensures OsPath on Unix will only contain values 0 <= c < 0x100
95 : {
96 6861 : DetectSeparator();
97 6861 : }
98 :
99 16496 : Path(const wchar_t* p)
100 32992 : : path(p, p+wcslen(p))
101 : {
102 16496 : DetectSeparator();
103 16496 : }
104 :
105 12 : Path(const std::string& s)
106 48 : : path((const unsigned char*)s.c_str(), (const unsigned char*)s.c_str()+s.length())
107 : {
108 12 : DetectSeparator();
109 12 : }
110 :
111 63555 : Path(const std::wstring& s)
112 63555 : : path(s)
113 : {
114 63555 : DetectSeparator();
115 63555 : }
116 :
117 : Path& operator=(const Path& rhs)
118 : {
119 5415 : path = rhs.path;
120 5415 : DetectSeparator(); // (warns if separators differ)
121 5041 : return *this;
122 : }
123 :
124 : bool empty() const
125 : {
126 55882 : return path.empty();
127 : }
128 :
129 : const String& string() const
130 : {
131 70006 : return path;
132 : }
133 :
134 : /**
135 : * Return a UTF-8 version of the path, in a human-readable but potentially
136 : * lossy form. It is *not* safe to take this string and construct a new
137 : * Path object from it (it may fail for some non-ASCII paths) - it should
138 : * only be used for displaying paths to users.
139 : */
140 2633 : std::string string8() const
141 : {
142 2633 : Status err;
143 : #if !OS_WIN
144 : // On Unix, assume paths consisting of 8-bit charactes saved in this wide string.
145 13165 : std::string spath(path.begin(), path.end());
146 :
147 : // Return it if it's valid UTF-8
148 2633 : wstring_from_utf8(spath, &err);
149 2633 : if(err == INFO::OK)
150 2633 : return spath;
151 :
152 : // Otherwise assume ISO-8859-1 and let utf8_from_wstring treat each character as a Unicode code point.
153 : #endif
154 : // On Windows, paths are UTF-16 strings. We don't support non-BMP characters so we can assume it's simply a wstring.
155 0 : return utf8_from_wstring(path, &err);
156 : }
157 :
158 : bool operator<(const Path& rhs) const
159 : {
160 199192 : return path < rhs.path;
161 : }
162 :
163 : bool operator==(const Path& rhs) const
164 : {
165 12723 : return path == rhs.path;
166 : }
167 :
168 3430 : bool operator!=(const Path& rhs) const
169 : {
170 3430 : return !operator==(rhs);
171 : }
172 :
173 : bool IsDirectory() const
174 : {
175 34203 : if(empty()) // (ensure length()-1 is safe)
176 : return true; // (the VFS root directory is represented as an empty string)
177 34168 : return path[path.length()-1] == separator;
178 : }
179 :
180 1622 : Path Parent() const
181 : {
182 1622 : const size_t idxSlash = path.find_last_of(separator);
183 1622 : if(idxSlash == String::npos)
184 0 : return L"";
185 3244 : return path.substr(0, idxSlash);
186 : }
187 :
188 6295 : Path Filename() const
189 : {
190 6295 : const size_t idxSlash = path.find_last_of(separator);
191 6295 : if(idxSlash == String::npos)
192 2141 : return path;
193 8308 : return path.substr(idxSlash+1);
194 : }
195 :
196 804 : Path Basename() const
197 : {
198 2412 : const Path filename = Filename();
199 804 : const size_t idxDot = filename.string().find_last_of('.');
200 804 : if(idxDot == String::npos)
201 18 : return filename;
202 1563 : return filename.string().substr(0, idxDot);
203 : }
204 :
205 : // (Path return type allows callers to use our operator==)
206 5012 : Path Extension() const
207 : {
208 15036 : const Path filename = Filename();
209 5012 : const size_t idxDot = filename.string().find_last_of('.');
210 5012 : if(idxDot == String::npos)
211 0 : return Path();
212 7206 : return filename.string().substr(idxDot);
213 : }
214 :
215 795 : Path ChangeExtension(Path extension) const
216 : {
217 3975 : return Parent() / Path(Basename().string() + extension.string());
218 : }
219 :
220 18373 : Path operator/(Path rhs) const
221 : {
222 18373 : Path ret = *this;
223 36746 : if(ret.path.empty()) // (empty paths assume '/')
224 35 : ret.separator = rhs.separator;
225 18373 : if(!ret.IsDirectory())
226 5791 : ret.path += ret.separator;
227 :
228 18373 : if(rhs.path.find((ret.separator == '/')? '\\' : '/') != String::npos)
229 : {
230 0 : PrintToDebugOutput();
231 0 : rhs.PrintToDebugOutput();
232 0 : DEBUG_WARN_ERR(ERR::PATH_MIXED_SEPARATORS);
233 : }
234 18373 : ret.path += rhs.path;
235 18373 : return ret;
236 : }
237 :
238 : /**
239 : * Return the path before the common part of both paths
240 : * @param other Indicates the start of the path which should be removed
241 : * @note other should be a VfsPath, while this should be an OsPath
242 : */
243 379 : Path BeforeCommon(Path other) const
244 : {
245 1137 : Path ret = *this;
246 758 : if(ret.empty() || other.empty())
247 0 : return L"";
248 :
249 : // Convert the separator to allow for string comparison
250 379 : if(other.separator != ret.separator)
251 0 : std::replace(other.path.begin(), other.path.end(), other.separator, ret.separator);
252 :
253 379 : const size_t idx = ret.path.rfind(other.path);
254 379 : if(idx == String::npos)
255 0 : return L"";
256 :
257 758 : return path.substr(0, idx);
258 : }
259 :
260 : static Status Validate(String::value_type c);
261 :
262 : private:
263 0 : void PrintToDebugOutput() const
264 : {
265 0 : debug_printf("Path %s, separator %c\n", string8().c_str(), (char)separator);
266 0 : }
267 :
268 157575 : void DetectSeparator()
269 : {
270 157575 : const size_t idxBackslash = path.find('\\');
271 :
272 157575 : if(path.find('/') != String::npos && idxBackslash != String::npos)
273 : {
274 0 : PrintToDebugOutput();
275 0 : DEBUG_WARN_ERR(ERR::PATH_MIXED_SEPARATORS);
276 : }
277 :
278 : // (default to '/' for empty strings)
279 157575 : separator = (idxBackslash == String::npos)? '/' : '\\';
280 157575 : }
281 :
282 : String path;
283 :
284 : // note: ideally, path strings would only contain '/' or even SYS_DIR_SEP.
285 : // however, Windows-specific code (e.g. the sound driver detection)
286 : // uses these routines with '\\' strings. the boost::filesystem approach of
287 : // converting them all to '/' and then back via external_file_string is
288 : // annoying and inefficient. we allow either type of separators,
289 : // appending whichever was first encountered. when modifying the path,
290 : // we ensure the same separator is used.
291 : wchar_t separator = L'/';
292 : };
293 :
294 : static inline std::wostream& operator<<(std::wostream& s, const Path& path)
295 : {
296 : s << path.string();
297 : return s;
298 : }
299 :
300 : static inline std::wistream& operator>>(std::wistream& s, Path& path)
301 : {
302 : Path::String string;
303 : s >> string;
304 : path = Path(string);
305 : return s;
306 : }
307 :
308 : namespace std
309 : {
310 : template<>
311 : struct hash<Path>
312 : {
313 0 : std::size_t operator()(const Path& path) const
314 : {
315 193 : return m_StringHash(path.string());
316 : }
317 :
318 : private:
319 : std::hash<std::wstring> m_StringHash;
320 : };
321 : }
322 :
323 : #endif // #ifndef INCLUDED_PATH
|