GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
lo-sysdep.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 1996-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include <cstdlib>
31 #include <locale>
32 #include <codecvt>
33 
34 #include "dir-ops.h"
35 #include "file-ops.h"
36 #include "file-stat.h"
37 #include "lo-error.h"
38 #include "lo-sysdep.h"
39 #include "localcharset-wrapper.h"
40 #include "putenv-wrapper.h"
41 #include "unistd-wrappers.h"
42 #include "unsetenv-wrapper.h"
43 
44 #if defined (OCTAVE_USE_WINDOWS_API)
45 # include <windows.h>
46 # include <wchar.h>
47 
48 # include "filepos-wrappers.h"
49 # include "lo-hash.h"
50 # include "oct-locbuf.h"
51 # include "uniconv-wrappers.h"
52 # include "unwind-prot.h"
53 #endif
54 
56 
58 
59 int
60 system (const std::string& cmd_str)
61 {
62 #if defined (OCTAVE_USE_WINDOWS_API)
63  const std::wstring wcmd_str = u8_to_wstring (cmd_str);
64 
65  return _wsystem (wcmd_str.c_str ());
66 #else
67  return ::system (cmd_str.c_str ());
68 #endif
69 }
70 
71 std::string
73 {
74  std::string retval;
75 
76 #if defined (OCTAVE_USE_WINDOWS_API)
77  wchar_t *tmp = _wgetcwd (nullptr, 0);
78 
79  if (! tmp)
80  (*current_liboctave_error_handler) ("unable to find current directory");
81 
82  std::wstring tmp_wstr (tmp);
83  free (tmp);
84 
85  std::string tmp_str = u8_from_wstring (tmp_wstr);
86 
87  retval = tmp_str;
88 
89 #else
90  // Using octave_getcwd_wrapper ensures that we have a getcwd that
91  // will allocate a buffer as large as necessary if buf and size are
92  // both 0.
93 
94  char *tmp = octave_getcwd_wrapper (nullptr, 0);
95 
96  if (! tmp)
97  (*current_liboctave_error_handler) ("unable to find current directory");
98 
99  retval = tmp;
100  free (tmp);
101 #endif
102 
103  return retval;
104 }
105 
106 int
107 chdir (const std::string& path_arg)
108 {
109  std::string path = sys::file_ops::tilde_expand (path_arg);
110 
111 #if defined (OCTAVE_USE_WINDOWS_API)
112  if (path.length () == 2 && path[1] == ':')
113  path += '\\';
114 #endif
115 
116  return octave_chdir_wrapper (path.c_str ());
117 }
118 
119 bool
120 get_dirlist (const std::string& dirname, string_vector& dirlist,
121  std::string& msg)
122 {
123  dirlist = "";
124  msg = "";
125 #if defined (OCTAVE_USE_WINDOWS_API)
126  _WIN32_FIND_DATAW ffd;
127 
128  std::string path_name (dirname);
129  if (path_name.empty ())
130  return true;
131 
132  if (path_name.back () == '\\' || path_name.back () == '/')
133  path_name.push_back ('*');
134  else
135  path_name.append (R"(\*)");
136 
137  // Find first file in directory.
138  std::wstring wpath_name = u8_to_wstring (path_name);
139  HANDLE hFind = FindFirstFileW (wpath_name.c_str (), &ffd);
140  if (INVALID_HANDLE_VALUE == hFind)
141  {
142  DWORD errCode = GetLastError ();
143  char *errorText = nullptr;
144  FormatMessageA (FORMAT_MESSAGE_FROM_SYSTEM |
145  FORMAT_MESSAGE_ALLOCATE_BUFFER |
146  FORMAT_MESSAGE_IGNORE_INSERTS,
147  nullptr, errCode,
148  MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
149  reinterpret_cast <char *> (&errorText), 0, nullptr);
150  if (errorText != nullptr)
151  {
152  msg = std::string (errorText);
153  LocalFree (errorText);
154  }
155  return false;
156  }
157 
158  std::list<std::string> dirlist_str;
159  do
160  dirlist_str.push_back (u8_from_wstring (ffd.cFileName));
161  while (FindNextFileW (hFind, &ffd) != 0);
162 
163  FindClose(hFind);
164 
165  dirlist = string_vector (dirlist_str);
166 
167 #else
168 
169  dir_entry dir (dirname);
170 
171  if (! dir)
172  {
173  msg = dir.error ();
174  return false;
175  }
176 
177  dirlist = dir.read ();
178 
179  dir.close ();
180 #endif
181 
182  return true;
183 }
184 
185 #if defined (OCTAVE_USE_WINDOWS_API)
186 
187 static bool
188 check_fseek_ftell_workaround_needed (bool set_nonbuffered_mode)
189 {
190  // To check whether the workaround is needed:
191  //
192  // * Create a tmp file with LF line endings only.
193  //
194  // * Open that file for reading in text mode.
195  //
196  // * Read a line.
197  //
198  // * Use ftello to record the position of the beginning of the
199  // second line.
200  //
201  // * Read and save the contents of the second line.
202  //
203  // * Use fseeko to return to the saved position.
204  //
205  // * Read the second line again and compare to the previously
206  // saved text.
207  //
208  // * If the lines are different, we need to set non-buffered
209  // input mode for files opened in text mode.
210 
211  std::string tmpname = sys::tempnam ("", "oct-");
212 
213  if (tmpname.empty ())
214  {
215  (*current_liboctave_warning_handler)
216  ("fseek/ftell bug check failed (tmp name creation)!");
217  return false;
218  }
219 
220  std::FILE *fptr = std::fopen (tmpname.c_str (), "wb");
221 
222  if (! fptr)
223  {
224  (*current_liboctave_warning_handler)
225  ("fseek/ftell bug check failed (opening tmp file for writing)!");
226  return false;
227  }
228 
229  fprintf (fptr, "%s", "foo\nbar\nbaz\n");
230 
231  std::fclose (fptr);
232 
233  fptr = std::fopen (tmpname.c_str (), "rt");
234 
235  if (! fptr)
236  {
237  (*current_liboctave_warning_handler)
238  ("fseek/ftell bug check failed (opening tmp file for reading)!");
239  return false;
240  }
241 
242  unwind_action act ([=] ()
243  {
244  std::fclose (fptr);
245  sys::unlink (tmpname);
246  });
247 
248  if (set_nonbuffered_mode)
249  ::setvbuf (fptr, nullptr, _IONBF, 0);
250 
251  while (true)
252  {
253  int c = fgetc (fptr);
254 
255  if (c == EOF)
256  {
257  (*current_liboctave_warning_handler)
258  ("fseek/ftell bug check failed (skipping first line)!");
259  return false;
260  }
261 
262  if (c == '\n')
263  break;
264  }
265 
266  off_t pos = octave_ftello_wrapper (fptr);
267 
268  char buf1[8];
269  int i = 0;
270  while (true)
271  {
272  int c = fgetc (fptr);
273 
274  if (c == EOF)
275  {
276  (*current_liboctave_warning_handler)
277  ("fseek/ftell bug check failed (reading second line)!");
278  return false;
279  }
280 
281  if (c == '\n')
282  break;
283 
284  buf1[i++] = static_cast<char> (c);
285  }
286  buf1[i] = '\0';
287 
289 
290  char buf2[8];
291  i = 0;
292  while (true)
293  {
294  int c = fgetc (fptr);
295 
296  if (c == EOF)
297  {
298  (*current_liboctave_warning_handler)
299  ("fseek/ftell bug check failed (reading after repositioning)!");
300  return false;
301  }
302 
303  if (c == '\n')
304  break;
305 
306  buf2[i++] = static_cast<char> (c);
307  }
308  buf2[i] = '\0';
309 
310  return strcmp (buf1, buf2);
311 }
312 
313 static std::string
314 get_formatted_last_error ()
315 {
316  std::string msg = "";
317 
318  DWORD last_error = GetLastError ();
319 
320  wchar_t *error_text = nullptr;
321  FormatMessageW (FORMAT_MESSAGE_FROM_SYSTEM |
322  FORMAT_MESSAGE_ALLOCATE_BUFFER |
323  FORMAT_MESSAGE_IGNORE_INSERTS,
324  nullptr, last_error,
325  MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
326  reinterpret_cast <wchar_t *> (&error_text), 0, nullptr);
327 
328  if (error_text != nullptr)
329  {
330  msg = u8_from_wstring (error_text);
331  LocalFree (error_text);
332  }
333  else
334  msg = "Unknown error.";
335 
336  return msg;
337 }
338 #endif
339 
340 bool
341 file_exists (const std::string& filename, bool is_dir)
342 {
343  // Check if a file with the given name exists on the file system. If is_dir
344  // is true (the default), also return true if filename refers to a directory.
345 #if defined (OCTAVE_USE_WINDOWS_API)
346  std::wstring w_fn = u8_to_wstring (filename);
347 
348  DWORD f_attr = GetFileAttributesW (w_fn.c_str ());
349 
350  return ((f_attr != INVALID_FILE_ATTRIBUTES)
351  && (is_dir || ! (f_attr & FILE_ATTRIBUTE_DIRECTORY)));
352 
353 #else
354  file_stat fs (filename);
355 
356  return (fs && (fs.is_reg () || (is_dir && fs.is_dir ())));
357 
358 #endif
359 }
360 
361 bool
362 file_exists (const std::string& filename, bool is_dir, std::string& msg)
363 {
364  // Check if a file with the given name exists on the file system. If is_dir
365  // is true (the default), also return true if filename refers to a directory.
366 #if defined (OCTAVE_USE_WINDOWS_API)
367  std::wstring w_fn = u8_to_wstring (filename);
368 
369  DWORD f_attr = GetFileAttributesW (w_fn.c_str ());
370 
371  if (f_attr == INVALID_FILE_ATTRIBUTES)
372  msg = get_formatted_last_error ();
373 
374  return ((f_attr != INVALID_FILE_ATTRIBUTES)
375  && (is_dir || ! (f_attr & FILE_ATTRIBUTE_DIRECTORY)));
376 
377 #else
378  file_stat fs (filename);
379 
380  if (! fs)
381  msg = fs.error ();
382 
383  return (fs && (fs.is_reg () || (is_dir && fs.is_dir ())));
384 
385 #endif
386 }
387 
388 bool
389 dir_exists (const std::string& dirname)
390 {
391  // Check if a directory with the given name exists on the file system.
392 #if defined (OCTAVE_USE_WINDOWS_API)
393  std::wstring w_dn = u8_to_wstring (dirname);
394 
395  DWORD f_attr = GetFileAttributesW (w_dn.c_str ());
396 
397  return ((f_attr != INVALID_FILE_ATTRIBUTES)
398  && (f_attr & FILE_ATTRIBUTE_DIRECTORY));
399 
400 #else
401  file_stat fs (dirname);
402 
403  return (fs && fs.is_dir ());
404 
405 #endif
406 }
407 
408 bool
409 dir_exists (const std::string& dirname, std::string& msg)
410 {
411  // Check if a directory with the given name exists on the file system.
412 #if defined (OCTAVE_USE_WINDOWS_API)
413  std::wstring w_dn = u8_to_wstring (dirname);
414 
415  DWORD f_attr = GetFileAttributesW (w_dn.c_str ());
416 
417  if (f_attr == INVALID_FILE_ATTRIBUTES)
418  msg = get_formatted_last_error ();
419 
420  return ((f_attr != INVALID_FILE_ATTRIBUTES)
421  && (f_attr & FILE_ATTRIBUTE_DIRECTORY));
422 
423 #else
424  file_stat fs (dirname);
425 
426  if (! fs)
427  msg = fs.error ();
428 
429  return (fs && fs.is_dir ());
430 
431 #endif
432 }
433 
434 // Return TRUE if FILE1 and FILE2 refer to the same (physical) file.
435 
436 bool
437 same_file (const std::string& file1, const std::string& file2)
438 {
439 #if defined (OCTAVE_USE_WINDOWS_API)
440 
441  // FIXME: When Octave switches to C++17, consider replacing this function
442  // by https://en.cppreference.com/w/cpp/filesystem/equivalent.
443 
444  bool retval = false;
445 
446  std::wstring file1w = sys::u8_to_wstring (file1);
447  std::wstring file2w = sys::u8_to_wstring (file2);
448  const wchar_t *f1 = file1w.c_str ();
449  const wchar_t *f2 = file2w.c_str ();
450 
451  bool f1_is_dir = GetFileAttributesW (f1) & FILE_ATTRIBUTE_DIRECTORY;
452  bool f2_is_dir = GetFileAttributesW (f2) & FILE_ATTRIBUTE_DIRECTORY;
453 
454  // Windows native code
455  // Reference: http://msdn2.microsoft.com/en-us/library/aa363788.aspx
456 
457  DWORD share = FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE;
458 
459  HANDLE hfile1
460  = CreateFileW (f1, 0, share, 0, OPEN_EXISTING,
461  f1_is_dir ? FILE_FLAG_BACKUP_SEMANTICS : 0, 0);
462 
463  if (hfile1 != INVALID_HANDLE_VALUE)
464  {
465  HANDLE hfile2
466  = CreateFileW (f2, 0, share, 0, OPEN_EXISTING,
467  f2_is_dir ? FILE_FLAG_BACKUP_SEMANTICS : 0, 0);
468 
469  if (hfile2 != INVALID_HANDLE_VALUE)
470  {
471  BY_HANDLE_FILE_INFORMATION hfi1;
472  BY_HANDLE_FILE_INFORMATION hfi2;
473 
474  if (GetFileInformationByHandle (hfile1, &hfi1)
475  && GetFileInformationByHandle (hfile2, &hfi2))
476  {
477  retval = (hfi1.dwVolumeSerialNumber == hfi2.dwVolumeSerialNumber
478  && hfi1.nFileIndexHigh == hfi2.nFileIndexHigh
479  && hfi1.nFileIndexLow == hfi2.nFileIndexLow
480  && hfi1.nFileSizeHigh == hfi2.nFileSizeHigh
481  && hfi1.nFileSizeLow == hfi2.nFileSizeLow
482  && hfi1.ftLastWriteTime.dwLowDateTime
483  == hfi2.ftLastWriteTime.dwLowDateTime
484  && hfi1.ftLastWriteTime.dwHighDateTime
485  == hfi2.ftLastWriteTime.dwHighDateTime);
486  }
487 
488  CloseHandle (hfile2);
489  }
490 
491  CloseHandle (hfile1);
492  }
493 
494  return retval;
495 
496 #else
497 
498  // POSIX Code
499 
500  sys::file_stat fs_file1 (file1);
501  sys::file_stat fs_file2 (file2);
502 
503  return (fs_file1 && fs_file2
504  && fs_file1.ino () == fs_file2.ino ()
505  && fs_file1.dev () == fs_file2.dev ());
506 
507 #endif
508 }
509 
510 std::FILE *
511 fopen (const std::string& filename, const std::string& mode)
512 {
513 #if defined (OCTAVE_USE_WINDOWS_API)
514 
515  std::wstring wfilename = u8_to_wstring (filename);
516  std::wstring wmode = u8_to_wstring (mode);
517 
518  std::FILE *fptr = _wfopen (wfilename.c_str (), wmode.c_str ());
519 
520  static bool fseek_ftell_bug_workaround_needed = false;
521  static bool fseek_ftell_bug_checked = false;
522 
523  if (! fseek_ftell_bug_checked && mode.find ('t') != std::string::npos)
524  {
525  // FIXME: Is the following workaround needed for all files
526  // opened in text mode, or only for files opened for reading?
527 
528  // Try to avoid fseek/ftell bug on Windows systems by setting
529  // non-buffered input mode for files opened in text mode, but
530  // only if it appears that the workaround is needed. See
531  // Octave bug #58055.
532 
533  // To check whether the workaround is needed:
534  //
535  // * Create a tmp file with LF line endings only.
536  //
537  // * Open that file for reading in text mode.
538  //
539  // * Read a line.
540  //
541  // * Use ftello to record the position of the beginning of
542  // the second line.
543  //
544  // * Read and save the contents of the second line.
545  //
546  // * Use fseeko to return to the saved position.
547  //
548  // * Read the second line again and compare to the
549  // previously saved text.
550  //
551  // * If the lines are different, we need to set non-buffered
552  // input mode for files opened in text mode.
553  //
554  // * To verify that the workaround solves the problem,
555  // repeat the above test with non-buffered input mode. If
556  // that fails, warn that there may be trouble with
557  // ftell/fseek when reading files opened in text mode.
558 
559  if (check_fseek_ftell_workaround_needed (false))
560  {
561  if (check_fseek_ftell_workaround_needed (true))
562  (*current_liboctave_warning_handler)
563  ("fseek/ftell may fail for files opened in text mode");
564  else
565  fseek_ftell_bug_workaround_needed = true;
566  }
567 
568  fseek_ftell_bug_checked = true;
569  }
570 
571  if (fseek_ftell_bug_workaround_needed
572  && mode.find ('t') != std::string::npos)
573  ::setvbuf (fptr, nullptr, _IONBF, 0);
574 
575  return fptr;
576 
577 #else
578  return std::fopen (filename.c_str (), mode.c_str ());
579 #endif
580 }
581 
582 std::FILE *
583 fopen_tmp (const std::string& name, const std::string& mode)
584 {
585 #if defined (OCTAVE_USE_WINDOWS_API)
586 
587  // Append "D" to the mode string to indicate that this is a temporary
588  // file that should be deleted when the last open handle is closed.
589  std::string tmp_mode = mode + "D";
590 
591  return std::fopen (name.c_str (), tmp_mode.c_str ());
592 
593 #else
594 
595  std::FILE *fptr = std::fopen (name.c_str (), mode.c_str ());
596 
597  // From gnulib: This relies on the Unix semantics that a file is not
598  // really removed until it is closed.
599  octave_unlink_wrapper (name.c_str ());
600 
601  return fptr;
602 
603 #endif
604 }
605 
607 fstream (const std::string& filename, const std::ios::openmode mode)
608 {
609 #if defined (OCTAVE_USE_WINDOWS_API)
610 
611  std::wstring wfilename = u8_to_wstring (filename);
612 
613  return std::fstream (wfilename.c_str (), mode);
614 
615 #else
616  return std::fstream (filename.c_str (), mode);
617 #endif
618 }
619 
621 ifstream (const std::string& filename, const std::ios::openmode mode)
622 {
623 #if defined (OCTAVE_USE_WINDOWS_API)
624 
625  std::wstring wfilename = u8_to_wstring (filename);
626 
627  return std::ifstream (wfilename.c_str (), mode);
628 
629 #else
630  return std::ifstream (filename.c_str (), mode);
631 #endif
632 }
633 
635 ofstream (const std::string& filename, const std::ios::openmode mode)
636 {
637 #if defined (OCTAVE_USE_WINDOWS_API)
638 
639  std::wstring wfilename = u8_to_wstring (filename);
640 
641  return std::ofstream (wfilename.c_str (), mode);
642 
643 #else
644  return std::ofstream (filename.c_str (), mode);
645 #endif
646 }
647 
648 void
649 putenv_wrapper (const std::string& name, const std::string& value)
650 {
651  std::string new_env = name + "=" + value;
652 
653  // FIXME: The malloc leaks memory, but so would a call to setenv.
654  // Short of extreme measures to track memory, altering the environment
655  // always leaks memory, but the saving grace is that the leaks are small.
656 
657  // As far as I can see there's no way to distinguish between the
658  // various errors; putenv doesn't have errno values.
659 
660 #if defined (OCTAVE_USE_WINDOWS_API)
661  std::wstring new_wenv = u8_to_wstring (new_env);
662 
663  int len = (new_wenv.length () + 1) * sizeof (wchar_t);
664 
665  wchar_t *new_item = static_cast<wchar_t *> (std::malloc (len));
666 
667  wcscpy (new_item, new_wenv.c_str());
668 
669  if (_wputenv (new_item) < 0)
670  (*current_liboctave_error_handler)
671  ("putenv (%s) failed", new_env.c_str());
672 #else
673  int len = new_env.length () + 1;
674 
675  char *new_item = static_cast<char *> (std::malloc (len));
676 
677  std::strcpy (new_item, new_env.c_str());
678 
679  if (octave_putenv_wrapper (new_item) < 0)
680  (*current_liboctave_error_handler) ("putenv (%s) failed", new_item);
681 #endif
682 }
683 
684 std::string
685 getenv_wrapper (const std::string& name)
686 {
687 #if defined (OCTAVE_USE_WINDOWS_API)
688  std::wstring wname = u8_to_wstring (name);
689  wchar_t *env = _wgetenv (wname.c_str ());
690  return env ? u8_from_wstring (env) : "";
691 #else
692  char *env = ::getenv (name.c_str ());
693  return env ? env : "";
694 #endif
695 }
696 
697 int
698 unsetenv_wrapper (const std::string& name)
699 {
700 #if defined (OCTAVE_USE_WINDOWS_API)
701  putenv_wrapper (name, "");
702 
703  std::wstring wname = u8_to_wstring (name);
704  return (SetEnvironmentVariableW (wname.c_str (), nullptr) ? 0 : -1);
705 #else
706  return octave_unsetenv_wrapper (name.c_str ());
707 #endif
708 }
709 
710 bool
711 isenv_wrapper (const std::string& name)
712 {
713 #if defined (OCTAVE_USE_WINDOWS_API)
714  std::wstring wname = u8_to_wstring (name);
715  wchar_t *env = _wgetenv (wname.c_str ());
716 #else
717  char *env = ::getenv (name.c_str ());
718 #endif
719  return env != 0;
720 }
721 
722 std::wstring
723 u8_to_wstring (const std::string& utf8_string)
724 {
725  // convert multibyte UTF-8 string to wide character string
726  static std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>
727  wchar_conv;
728 
729  std::wstring retval = L"";
730 
731  try
732  {
733  retval = wchar_conv.from_bytes (utf8_string);
734  }
735  catch (const std::range_error& e)
736  {
737  // What to do in case of error?
738  // error ("u8_to_wstring: converting from UTF-8 to wchar_t: %s",
739  // e.what ());
740  }
741 
742  return retval;
743 }
744 
745 std::string
746 u8_from_wstring (const std::wstring& wchar_string)
747 {
748  // convert wide character string to multibyte UTF-8 string
749  static std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>
750  wchar_conv;
751 
752  std::string retval = "";
753 
754  try
755  {
756  retval = wchar_conv.to_bytes (wchar_string);
757  }
758  catch (const std::range_error& e)
759  {
760  // What to do in case of error?
761  // error ("u8_from_wstring: converting from wchar_t to UTF-8: %s",
762  // e.what ());
763  }
764 
765  return retval;
766 }
767 
768 // At quite a few places in the code we are passing file names as
769 // char arrays to external library functions.
770 
771 // When these functions try to locate the corresponding file on the
772 // disc, they need to use the wide character API on Windows to
773 // correctly open files with non-ASCII characters.
774 
775 // But they have no way of knowing which encoding we are using for
776 // the passed string. So they have no way of reliably converting to
777 // a wchar_t array. (I.e. there is no possible fix for these
778 // functions with current C or C++.)
779 
780 // To solve the dilemma, the function "get_ASCII_filename" first
781 // checks whether there are any non-ASCII characters in the passed
782 // file name. If there are not, it returns the original name.
783 
784 // Otherwise, it optionally tries to convert the file name to the locale
785 // charset.
786 
787 // If the file name contains characters that cannot be converted to the
788 // locale charset (or that step is skipped), it tries to obtain the short
789 // file name (8.3 naming scheme) which only consists of ASCII characters
790 // and are safe to pass. However, short file names can be disabled for
791 // performance reasons on the file system level with NTFS and they are not
792 // stored on other file systems (e.g. ExFAT). So there is no guarantee
793 // that these exist.
794 
795 // If short file names are not stored, a hard link to the file is
796 // created. For this the path to the file is split at the deepest
797 // possible level that doesn't contain non-ASCII characters. At
798 // that level a hidden folder is created that holds the hard links.
799 // That means we need to have write access on that location. A path
800 // to that hard link is returned.
801 
802 // If the file system is FAT32, there are no hard links. But FAT32
803 // always stores short file names. So we are safe.
804 
805 // ExFAT that is occasionally used on USB sticks and SD cards stores
806 // neither short file names nor does it support hard links. So for
807 // exFAT with this function, there is (currently) no way to generate
808 // a file name that is stripped from non-ASCII characters but still
809 // is valid.
810 
811 // For Unixy systems, this function does nothing.
812 
813 std::string
814 get_ASCII_filename (const std::string& orig_file_name,
815  const bool allow_locale)
816 {
817 #if defined (OCTAVE_USE_WINDOWS_API)
818 
819  // Return file name that only contains ASCII characters that can
820  // be used to access the file orig_file_name. The original file
821  // must exist in the file system before calling this function.
822  // This is useful for passing file names to functions that are not
823  // aware of the character encoding we are using.
824 
825  // 0. Check whether filename contains non-ASCII (UTF-8) characters.
826 
827  std::string::const_iterator first_non_ASCII
828  = std::find_if (orig_file_name.begin (), orig_file_name.end (),
829  [](char c) { return (c < 0 || c >= 128); });
830 
831  if (first_non_ASCII == orig_file_name.end ())
832  return orig_file_name;
833 
834  // 1. Optionally, check if all characters in the path can be successfully
835  // converted to the locale charset
836  if (allow_locale)
837  {
838  const char *locale = octave_locale_charset_wrapper ();
839  if (locale)
840  {
841  const uint8_t *name_u8 = reinterpret_cast<const uint8_t *>
842  (orig_file_name.c_str ());
843  std::size_t length = 0;
844  char *name_locale = octave_u8_conv_to_encoding_strict
845  (locale, name_u8,
846  orig_file_name.length () + 1, &length);
847  if (name_locale)
848  {
849  std::string file_name_locale (name_locale, length);
850  free (name_locale);
851  return file_name_locale;
852  }
853  }
854  }
855 
856  // 2. Check if file system stores short filenames (might be ASCII-only).
857 
858  std::wstring w_orig_file_name_str = u8_to_wstring (orig_file_name);
859  const wchar_t *w_orig_file_name = w_orig_file_name_str.c_str ();
860 
861  // Get full path to file
862  wchar_t w_full_file_name[_MAX_PATH];
863  if (_wfullpath (w_full_file_name, w_orig_file_name, _MAX_PATH) == nullptr)
864  return orig_file_name;
865 
866  std::wstring w_full_file_name_str = w_full_file_name;
867 
868  // Get short filename (8.3) from UTF-16 filename.
869 
870  long length = GetShortPathNameW (w_full_file_name, nullptr, 0);
871 
872  if (length > 0)
873  {
874  // Dynamically allocate the correct size (terminating null char
875  // was included in length).
876 
877  OCTAVE_LOCAL_BUFFER (wchar_t, w_short_file_name, length);
878  GetShortPathNameW (w_full_file_name, w_short_file_name, length);
879 
880  std::wstring w_short_file_name_str
881  = std::wstring (w_short_file_name, length);
882 
883  if (w_short_file_name_str.compare (0, length-1, w_full_file_name_str) != 0)
884  {
885  // Check whether short file name contains non-ASCII characters
886  std::string short_file_name
887  = u8_from_wstring (w_short_file_name_str);
888  first_non_ASCII
889  = std::find_if (short_file_name.begin (),
890  short_file_name.end (),
891  [](char c) { return (c < 0 || c >= 128); });
892  if (first_non_ASCII == short_file_name.end ())
893  return short_file_name;
894  }
895  }
896 
897  // 3. Create hard link with only-ASCII characters.
898  // Get longest possible part of path that only contains ASCII chars.
899 
900  std::wstring::iterator w_first_non_ASCII
901  = std::find_if (w_full_file_name_str.begin (), w_full_file_name_str.end (),
902  [](wchar_t c) { return (c < 0 || c >= 128); });
903  std::wstring tmp_substr
904  = std::wstring (w_full_file_name_str.begin (), w_first_non_ASCII);
905 
906  std::size_t pos
907  = tmp_substr.find_last_of (u8_to_wstring (file_ops::dir_sep_chars ()));
908 
909  std::string par_dir
910  = u8_from_wstring (w_full_file_name_str.substr (0, pos+1));
911 
912  // Create .oct_ascii directory.
913  // FIXME: We need to have write permission in this location.
914 
915  std::string oct_ascii_dir = par_dir + ".oct_ascii";
916  std::string test_dir = canonicalize_file_name (oct_ascii_dir);
917 
918  if (test_dir.empty ())
919  {
920  std::string msg;
921  int status = sys::mkdir (oct_ascii_dir, 0777, msg);
922 
923  if (status < 0)
924  return orig_file_name;
925 
926  // Set hidden property.
927  SetFileAttributesA (oct_ascii_dir.c_str (), FILE_ATTRIBUTE_HIDDEN);
928  }
929 
930  // Create file from hash of full filename.
931  std::string filename_hash
932  = (oct_ascii_dir + file_ops::dir_sep_str ()
933  + crypto::hash ("SHA1", orig_file_name));
934 
935  // FIXME: This is just to check if the file exists. Use a more efficient
936  // method.
937  std::string abs_filename_hash = canonicalize_file_name (filename_hash);
938 
939  if (! abs_filename_hash.empty ())
940  sys::unlink (filename_hash);
941 
942  // At this point, we know that we have only ASCII characters.
943  // So instead of converting, just copy the characters to std::wstring.
944  std::wstring w_filename_hash (filename_hash.begin (),
945  filename_hash.end ());
946 
947  if (CreateHardLinkW (w_filename_hash.c_str (), w_orig_file_name, nullptr))
948  return filename_hash;
949 
950 #else
951 
952  octave_unused_parameter (allow_locale);
953 
954 #endif
955 
956  return orig_file_name;
957 }
958 
959 OCTAVE_END_NAMESPACE(sys)
960 OCTAVE_END_NAMESPACE(octave)
#define SEEK_SET
bool is_dir() const
Definition: file-stat.cc:65
std::string error() const
Definition: file-stat.h:149
bool is_reg() const
Definition: file-stat.cc:83
Definition: dir-ops.h:42
string_vector read()
Definition: dir-ops.cc:91
std::string error() const
Definition: dir-ops.h:88
bool close()
Definition: dir-ops.cc:130
Definition: oct-env.h:40
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
std::string dir_sep_str()
std::string dir_sep_chars()
std::string dirname(const std::string &path)
int octave_fseeko_wrapper(FILE *fp, off_t offset, int whence)
off_t octave_ftello_wrapper(FILE *fp)
std::string hash(hash_fptr hash_fcn, const std::string &str, int result_buf_len)
Definition: lo-hash.cc:45
FloatComplex(* fptr)(const FloatComplex &, float, int, octave_idx_type &)
Definition: lo-specfun.cc:1102
int system(const std::string &cmd_str)
Definition: lo-sysdep.cc:60
bool dir_exists(const std::string &dirname)
Definition: lo-sysdep.cc:389
std::string u8_from_wstring(const std::wstring &wchar_string)
Definition: lo-sysdep.cc:746
std::string get_ASCII_filename(const std::string &orig_file_name, const bool allow_locale)
Definition: lo-sysdep.cc:814
std::fstream fstream(const std::string &filename, const std::ios::openmode mode)
Definition: lo-sysdep.cc:607
std::FILE * fopen(const std::string &filename, const std::string &mode)
Definition: lo-sysdep.cc:511
std::string getenv_wrapper(const std::string &name)
Definition: lo-sysdep.cc:685
bool file_exists(const std::string &filename, bool is_dir)
Definition: lo-sysdep.cc:341
bool isenv_wrapper(const std::string &name)
Definition: lo-sysdep.cc:711
std::ofstream ofstream(const std::string &filename, const std::ios::openmode mode)
Definition: lo-sysdep.cc:635
int chdir(const std::string &path_arg)
Definition: lo-sysdep.cc:107
std::string getcwd()
Definition: lo-sysdep.cc:72
std::ifstream ifstream(const std::string &filename, const std::ios::openmode mode)
Definition: lo-sysdep.cc:621
std::wstring u8_to_wstring(const std::string &utf8_string)
Definition: lo-sysdep.cc:723
void putenv_wrapper(const std::string &name, const std::string &value)
Definition: lo-sysdep.cc:649
bool get_dirlist(const std::string &dirname, string_vector &dirlist, std::string &msg)
Definition: lo-sysdep.cc:120
bool same_file(const std::string &file1, const std::string &file2)
Definition: lo-sysdep.cc:437
int unsetenv_wrapper(const std::string &name)
Definition: lo-sysdep.cc:698
std::FILE * fopen_tmp(const std::string &name, const std::string &mode)
Definition: lo-sysdep.cc:583
const char * octave_locale_charset_wrapper(void)
std::string tilde_expand(const std::string &name)
Definition: file-ops.cc:289
std::string tempnam(const std::string &dir, const std::string &pfx)
Definition: file-ops.cc:749
int mkdir(const std::string &nm, mode_t md)
Definition: file-ops.cc:413
std::string canonicalize_file_name(const std::string &name)
Definition: file-ops.cc:798
int unlink(const std::string &name)
Definition: file-ops.cc:727
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
bool strcmp(const T &str_a, const T &str_b)
Octave string utility functions.
void * malloc(unsigned)
void free(void *)
int octave_putenv_wrapper(char *str)
char * octave_u8_conv_to_encoding_strict(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
int octave_unlink_wrapper(const char *nm)
int octave_chdir_wrapper(const char *nm)
char * octave_getcwd_wrapper(char *nm, size_t len)
int octave_unsetenv_wrapper(const char *name)
F77_RET_T len
Definition: xerbla.cc:61