New src/java.base/windows/native/libjava/canonicalize

   1 /*
   2  * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * Pathname canonicalization for Win32 file systems
  28  */
  29 
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <ctype.h>
  34 #include <assert.h>
  35 #include <sys/stat.h>
  36 
  37 #include <windows.h>
  38 #include <winbase.h>
  39 #include <errno.h>
  40 
  41 /* We should also include jdk_util.h here, for the prototype of JDK_Canonicalize.
  42    This isn't possible though because canonicalize_md.c is as well used in
  43    different contexts within Oracle.
  44  */
  45 #include "io_util_md.h"
  46 
  47 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
  48    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
  49    before copying bytes from src to send - 1. */
  50 static WCHAR*
  51 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
  52 {
  53     WCHAR *p = src, *q = dst;
  54     if (first != L'\0') {
  55         if (q < dend) {
  56             *q++ = first;
  57         } else {
  58             errno = ENAMETOOLONG;
  59             return NULL;
  60         }
  61     }
  62     if (send - p > dend - q) {
  63         errno = ENAMETOOLONG;
  64         return NULL;
  65     }
  66     while (p < send)
  67         *q++ = *p++;
  68     return q;
  69 }
  70 
  71 /* Find first instance of '\\' at or following start.  Return the address of
  72    that byte or the address of the null terminator if '\\' is not found. */
  73 static WCHAR *
  74 wnextsep(WCHAR *start)
  75 {
  76     WCHAR *p = start;
  77     int c;
  78     while ((c = *p) && (c != L'\\'))
  79         p++;
  80     return p;
  81 }
  82 
  83 /* Tell whether the given string contains any wildcard characters */
  84 static int
  85 wwild(WCHAR *start)
  86 {
  87     WCHAR *p = start;
  88     int c;
  89     while (c = *p) {
  90         if ((c == L'*') || (c == L'?'))
  91             return 1;
  92         p++;
  93     }
  94     return 0;
  95 }
  96 
  97 /* Tell whether the given string contains prohibited combinations of dots.
  98    In the canonicalized form no path element may have dots at its end.
  99    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
 100    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
 101 */
 102 static int
 103 wdots(WCHAR *start)
 104 {
 105     WCHAR *p = start;
 106     // Skip "\\.\" prefix
 107     if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4))
 108         p = p + 4;
 109 
 110     while (*p) {
 111         if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.'
 112             return 0; // no more dots
 113         p++; // next char
 114         while ((*p) == L'.') // go to the end of dots
 115             p++;
 116         if (*p && (*p != L'\\')) // path element does not end with a dot
 117             p++; // go to the next char
 118         else
 119             return 1; // path element does end with a dot - prohibited
 120     }
 121     return 0; // no prohibited combinations of dots found
 122 }
 123 
 124 /* If the lookup of a particular prefix fails because the file does not exist,
 125    because it is of the wrong type, because access is denied, or because the
 126    network is unreachable then canonicalization does not fail, it terminates
 127    successfully after copying the rest of the original path to the result path.
 128    Other I/O errors cause an error return.
 129 */
 130 int
 131 lastErrorReportable()
 132 {
 133     DWORD errval = GetLastError();
 134     if ((errval == ERROR_FILE_NOT_FOUND)
 135         || (errval == ERROR_DIRECTORY)
 136         || (errval == ERROR_PATH_NOT_FOUND)
 137         || (errval == ERROR_BAD_NETPATH)
 138         || (errval == ERROR_BAD_NET_NAME)
 139         || (errval == ERROR_ACCESS_DENIED)
 140         || (errval == ERROR_NETWORK_UNREACHABLE)
 141         || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
 142         return 0;
 143     }
 144     return 1;
 145 }
 146 
 147 /* Convert a pathname to canonical form.  The input orig_path is assumed to
 148    have been converted to native form already, via JVM_NativePath().  This is
 149    necessary because _fullpath() rejects duplicate separator characters on
 150    Win95, though it accepts them on NT. */
 151 int
 152 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
 153 {
 154     WIN32_FIND_DATAW fd;
 155     HANDLE h;
 156     WCHAR *path;    /* Working copy of path */
 157     WCHAR *src, *dst, *dend, c;
 158 
 159     /* Reject paths that contain wildcards */
 160     if (wwild(orig_path)) {
 161         errno = EINVAL;
 162         return -1;
 163     }
 164 
 165     if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
 166         return -1;
 167 
 168     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 169        contrary to the documentation, the _fullpath procedure does not require
 170        the drive to be available.  */
 171     if(!_wfullpath(path, orig_path, size)) {
 172         goto err;
 173     }
 174 
 175     if (wdots(path)) /* Check for prohibited combinations of dots */
 176         goto err;
 177 
 178     src = path;            /* Start scanning here */
 179     dst = result;        /* Place results here */
 180     dend = dst + size;        /* Don't go to or past here */
 181 
 182     /* Copy prefix, assuming path is absolute */
 183     c = src[0];
 184     if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
 185        && (src[1] == L':') && (src[2] == L'\\')) {
 186         /* Drive specifier */
 187         *src = towupper(*src);    /* Canonicalize drive letter */
 188         if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
 189             goto err;
 190         }
 191 
 192         src += 2;
 193     } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
 194         /* UNC pathname */
 195         WCHAR *p;
 196         p = wnextsep(src + 2);    /* Skip past host name */
 197         if (!*p) {
 198             /* A UNC pathname must begin with "\\\\host\\share",
 199                so reject this path as invalid if there is no share name */
 200             errno = EINVAL;
 201             goto err;
 202         }
 203         p = wnextsep(p + 1);    /* Skip past share name */
 204         if (!(dst = wcp(dst, dend, L'\0', src, p)))
 205             goto err;
 206         src = p;
 207     } else {
 208         /* Invalid path */
 209         errno = EINVAL;
 210         goto err;
 211     }
 212     /* At this point we have copied either a drive specifier ("z:") or a UNC
 213        prefix ("\\\\host\\share") to the result buffer, and src points to the
 214        first byte of the remainder of the path.  We now scan through the rest
 215        of the path, looking up each prefix in order to find the true name of
 216        the last element of each prefix, thereby computing the full true name of
 217        the original path. */
 218     while (*src) {
 219         WCHAR *p = wnextsep(src + 1);    /* Find next separator */
 220         WCHAR c = *p;
 221         WCHAR *pathbuf;
 222         int pathlen;
 223 
 224         assert(*src == L'\\');        /* Invariant */
 225         *p = L'\0';            /* Temporarily clear separator */
 226 
 227         if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) {
 228             pathbuf = getPrefixed(path, pathlen);
 229             h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 230             free(pathbuf);
 231         } else
 232             h = FindFirstFileW(path, &fd);    /* Look up prefix */
 233 
 234         *p = c;                /* Restore separator */
 235         if (h != INVALID_HANDLE_VALUE) {
 236             /* Lookup succeeded; append true name to result and continue */
 237             FindClose(h);
 238             if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
 239                             fd.cFileName + wcslen(fd.cFileName)))){
 240                 goto err;
 241             }
 242             src = p;
 243             continue;
 244         } else {
 245             if (!lastErrorReportable()) {
 246                if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
 247                    goto err;
 248                }
 249                 break;
 250             } else {
 251                 goto err;
 252             }
 253         }
 254     }
 255 
 256     if (dst >= dend) {
 257     errno = ENAMETOOLONG;
 258         goto err;
 259     }
 260     *dst = L'\0';
 261     free(path);
 262     return 0;
 263 
 264  err:
 265     free(path);
 266     return -1;
 267 }
 268 
 269 /* Convert a pathname to canonical form.  The input prefix is assumed
 270    to be in canonical form already, and the trailing filename must not
 271    contain any wildcard, dot/double dot, or other "tricky" characters
 272    that are rejected by the canonicalize() routine above.  This
 273    routine is present to allow the canonicalization prefix cache to be
 274    used while still returning canonical names with the correct
 275    capitalization. */
 276 int
 277 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
 278 {
 279     WIN32_FIND_DATAW fd;
 280     HANDLE h;
 281     WCHAR *src, *dst, *dend;
 282     WCHAR *pathbuf;
 283     int pathlen;
 284 
 285     src = pathWithCanonicalPrefix;
 286     dst = result;        /* Place results here */
 287     dend = dst + size;   /* Don't go to or past here */
 288 
 289 
 290     if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
 291         pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
 292         h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 293         free(pathbuf);
 294     } else
 295         h = FindFirstFileW(pathWithCanonicalPrefix, &fd);    /* Look up prefix */
 296     if (h != INVALID_HANDLE_VALUE) {
 297         /* Lookup succeeded; append true name to result and continue */
 298         FindClose(h);
 299         if (!(dst = wcp(dst, dend, L'\0',
 300                         canonicalPrefix,
 301                         canonicalPrefix + wcslen(canonicalPrefix)))) {
 302             return -1;
 303         }
 304         if (!(dst = wcp(dst, dend, L'\\',
 305                         fd.cFileName,
 306                         fd.cFileName + wcslen(fd.cFileName)))) {
 307             return -1;
 308         }
 309     } else {
 310         if (!lastErrorReportable()) {
 311             if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
 312                 return -1;
 313             }
 314         } else {
 315             return -1;
 316         }
 317     }
 318 
 319     if (dst >= dend) {
 320         errno = ENAMETOOLONG;
 321         return -1;
 322     }
 323     *dst = L'\0';
 324     return 0;
 325 }
 326 
 327 /* Non-Wide character version of canonicalize.
 328    Converts to wchar and delegates to wcanonicalize. */
 329 JNIEXPORT int
 330 JDK_Canonicalize(const char *orig, char *out, int len) {
 331     wchar_t* wpath = NULL;
 332     wchar_t* wresult = NULL;
 333     int wpath_len;
 334     int ret = -1;
 335 
 336     /* Get required buffer size to convert to Unicode */
 337     wpath_len = MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS,
 338                                     orig, -1, NULL, 0);
 339     if (wpath_len == 0) {
 340         goto finish;
 341     }
 342 
 343     if ((wpath = (wchar_t*) malloc(sizeof(wchar_t) * wpath_len)) == NULL) {
 344         goto finish;
 345     }
 346 
 347     if (MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS,
 348                             orig, -1, wpath, wpath_len) == 0) {
 349         goto finish;
 350     }
 351 
 352     if ((wresult = (wchar_t*) malloc(sizeof(wchar_t) * len)) == NULL) {
 353         goto finish;
 354     }
 355 
 356     if (wcanonicalize(wpath, wresult, len) != 0) {
 357         goto finish;
 358     }
 359 
 360     if (WideCharToMultiByte(CP_ACP, 0,
 361                             wresult, -1, out, len, NULL, NULL) == 0) {
 362         goto finish;
 363     }
 364 
 365     // Change return value to success.
 366     ret = 0;
 367 
 368 finish:
 369     free(wresult);
 370     free(wpath);
 371 
 372     return ret;
 373 }
 374 
 375 /* The appropriate location of getPrefixed() is io_util_md.c, but it is
 376    also used in a non-OpenJDK context within Oracle. There, canonicalize_md.c
 377    is already pulled in and compiled, so to avoid more complicated solutions
 378    we keep this method here.
 379  */
 380 
 381 /* copy \\?\ or \\?\UNC\ to the front of path */
 382 JNIEXPORT WCHAR*
 383 getPrefixed(const WCHAR* path, int pathlen) {
 384     WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
 385     if (pathbuf != 0) {
 386         if (path[0] == L'\\' && path[1] == L'\\') {
 387             if (path[2] == L'?' && path[3] == L'\\'){
 388                 /* if it already has a \\?\ don't do the prefix */
 389                 wcscpy(pathbuf, path );
 390             } else {
 391                 /* only UNC pathname includes double slashes here */
 392                 wcscpy(pathbuf, L"\\\\?\\UNC\0");
 393                 wcscat(pathbuf, path + 1);
 394             }
 395         } else {
 396             wcscpy(pathbuf, L"\\\\?\\\0");
 397             wcscat(pathbuf, path );
 398         }
 399     }
 400     return pathbuf;
 401 }