[cp-patches] FYI: rewrite File.toCanonicalPath()

Gary Benson gbenson at redhat.com
Wed Jun 7 15:19:24 UTC 2006


Hi all,

I just checked in a rewrite of File.toCanonicalPath() for GNU/Posix
systems to make it correctly handle symbolic links.  It's basically
a JNIified version of the one I committed yesterday for GCJ.  This
fixes PR 24895.

Cheers,
Gary
-------------- next part --------------
Index: ChangeLog
===================================================================
RCS file: /cvsroot/classpath/classpath/ChangeLog,v
retrieving revision 1.7697
diff -u -r1.7697 ChangeLog
--- ChangeLog	7 Jun 2006 14:46:49 -0000	1.7697
+++ ChangeLog	7 Jun 2006 15:08:26 -0000
@@ -1,3 +1,15 @@
+2006-06-07  Gary Benson  <gbenson at redhat.com>
+
+	PR 24895
+	* native/jni/java-io/java_io_VMFile.c
+	(Java_java_io_VMFile_toCanonicalForm): New method.
+	* configure.ac: Added checks for lstat and readlink.
+	* include/java_io_VMFile.h: Added new method.
+	* vm/reference/java/io/VMFile.java: Use new method.
+	* gnu/java/io/PlatformHelper.java (toCanonicalForm): Removed.
+	* NEWS: Documented the above.
+	* java/io/File.java: Javadoc fix.
+
 2006-06-06  Roman Kennke  <kennke at aicas.com>
 
 	PR 27920
Index: configure.ac
===================================================================
RCS file: /cvsroot/classpath/classpath/configure.ac,v
retrieving revision 1.159
diff -u -r1.159 configure.ac
--- configure.ac	6 Jun 2006 10:19:48 -0000	1.159
+++ configure.ac	7 Jun 2006 15:08:27 -0000
@@ -321,6 +321,7 @@
 		  strerror_r \
                   fcntl \
 		  mmap munmap mincore msync madvise getpagesize sysconf \
+		  lstat readlink \
 		  ])
 
   LIBMAGIC=
Index: NEWS
===================================================================
RCS file: /cvsroot/classpath/classpath/NEWS,v
retrieving revision 1.147
diff -u -r1.147 NEWS
--- NEWS	5 Jun 2006 18:37:59 -0000	1.147
+++ NEWS	7 Jun 2006 15:08:28 -0000
@@ -27,6 +27,9 @@
   URLConnection.guessContentTypeFromStream.  The reference
   implementation uses libmagic (and falls back to doing nothing if
   libmagic is not available).
+* The method gnu.java.io.PlatformHelper.toCanonicalForm() has been
+  replaced with a JNI implementation of VMFile.toCanonicalForm() for
+  GNU/Posix systems.
 
 New in release 0.91 (May 15, 2006)
 
Index: native/jni/java-io/java_io_VMFile.c
===================================================================
RCS file: /cvsroot/classpath/classpath/native/jni/java-io/java_io_VMFile.c,v
retrieving revision 1.10
diff -u -r1.10 java_io_VMFile.c
--- native/jni/java-io/java_io_VMFile.c	25 Jan 2006 10:40:12 -0000	1.10
+++ native/jni/java-io/java_io_VMFile.c	7 Jun 2006 15:08:28 -0000
@@ -1,5 +1,5 @@
 /* java_io_VMFile.c - Native methods for java.io.File class
-   Copyright (C) 1998, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1998, 2004, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -730,3 +730,237 @@
   return (0);
 #endif /* not WITHOUT_FILESYSTEM */
 }
+
+/*************************************************************************/
+
+/*
+ * These two methods are used to maintain dynamically allocated
+ * buffers for getCanonicalPath without the overhead of calling
+ * realloc every time a buffer is modified.  Buffers are sized
+ * at the smallest multiple of CHUNKSIZ that is greater than or
+ * equal to the desired length.  The default CHUNKSIZ is 256,
+ * longer than most paths, so in most cases a getCanonicalPath
+ * will require only one malloc per buffer.
+ */
+
+#define CHUNKLOG 8
+#define CHUNKSIZ (1 << CHUNKLOG)
+
+static int
+nextChunkSize (int size)
+{
+  return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG;
+}
+
+static char *
+maybeGrowBuf (JNIEnv *env, char *buf, int *size, int required)
+{
+  if (required > *size)
+    {
+      *size = nextChunkSize (required);
+      buf = JCL_realloc (env, buf, *size);
+    }
+  return buf;
+}
+
+/*************************************************************************/
+
+/*
+ * This method converts a path to canonical form on GNU/Posix systems.
+ * This involves the removal of redundant separators, references to
+ * "." and "..", and symbolic links.
+ *
+ * The conversion proceeds on a component-by-component basis: symbolic
+ * links and references to ".."  are resolved as and when they occur.
+ * This means that if "/foo/bar" is a symbolic link to "/baz" then the
+ * canonical form of "/foo/bar/.." is "/" and not "/foo".
+ *
+ * In order to mimic the behaviour of proprietary JVMs, non-existant
+ * path components are allowed (a departure from the normal GNU system
+ * convention).  This means that if "/foo/bar" is a symbolic link to
+ * "/baz", the canonical form of "/non-existant-directory/../foo/bar"
+ * is "/baz".
+ *
+ * Class:     java_io_VMFile
+ * Method:    toCanonicalForm
+ * Signature: (Ljava/lang/String)Ljava/lang/String
+ */
+
+JNIEXPORT jstring JNICALL
+Java_java_io_VMFile_toCanonicalForm (JNIEnv *env,
+				     jclass class __attribute__ ((__unused__)),
+				     jstring jpath)
+{
+#ifndef WITHOUT_FILESYSTEM
+  const char *path;
+  char *src, *dst;
+  int srci, dsti;
+  int srcl, dstl;
+  int len;
+  int fschecks;
+#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
+  struct stat sb;
+#endif /* HAVE_LSTAT && HAVE_READLINK */
+
+  path = JCL_jstring_to_cstring (env, jpath);
+  if (path == NULL)
+    return NULL;
+
+  /* It is the caller's responsibility to ensure the path is absolute. */
+  if (path[0] == 0 || path[0] != '/')
+    {
+      JCL_free_cstring (env, jpath, path);
+      JCL_ThrowException (env, "java/lang/RuntimeException", "Not absolute");
+      return NULL;
+    }
+
+  len = strlen (path);
+  srcl = nextChunkSize (len + 1);
+  src = JCL_malloc (env, srcl);
+  if (src == NULL)
+    {
+      JCL_free_cstring (env, jpath, path);
+      return NULL;
+    }
+  strcpy (src, path);
+  JCL_free_cstring (env, jpath, path);
+  srci = 1;
+
+  dstl = nextChunkSize (2);  
+  dst = JCL_malloc (env, dstl);
+  if (dst == NULL)
+    {
+      JCL_free (env, src);
+      return NULL;
+    }
+  dst[0] = '/';
+  dsti = 1;
+
+  fschecks = JNI_TRUE;
+
+  while (src[srci] != '\0')
+    {
+      int tmpi, dsti_save;
+
+      /* Skip slashes. */
+      while (src[srci] == '/')
+	srci++;
+      tmpi = srci;
+      /* Find next slash. */
+      while (src[srci] != '/' && src[srci] != '\0')
+	srci++;
+      if (srci == tmpi)
+	/* We hit the end. */
+	break;
+      len = srci - tmpi;
+
+      /* Handle "." and "..". */
+      if (len == 1 && src[tmpi] == '.')
+	continue;
+      if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
+	{
+	  while (dsti > 1 && dst[dsti - 1] != '/')
+	    dsti--;
+	  if (dsti != 1)
+	    dsti--;
+	  /* Reenable filesystem checking if disabled, as we might
+	   * have reversed over whatever caused the problem before.
+	   * At least one proprietary JVM has inconsistencies because
+	   * it does not do this.
+	   */
+	  fschecks = JNI_TRUE;
+	  continue;
+	}
+
+      /* Handle real path components. */
+      dst = maybeGrowBuf (env,
+			  dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1);
+      if (dst == NULL)
+	{
+	  JCL_free (env, src);
+	  return NULL;
+	}
+      dsti_save = dsti;
+      if (dsti > 1)
+	dst[dsti++] = '/';
+      strncpy (&dst[dsti], &src[tmpi], len);
+      dsti += len;
+      if (fschecks == JNI_FALSE)
+	continue;
+
+#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
+      dst[dsti] = '\0';
+      if (lstat (dst, &sb) == 0)
+	{
+	  if (S_ISLNK (sb.st_mode))
+	    {
+	      int tmpl = CHUNKSIZ;
+	      char *tmp = JCL_malloc (env, tmpl);
+	      if (tmp == NULL)
+		{
+		  JCL_free (env, src);
+		  JCL_free (env, dst);
+		  return NULL;
+		}
+
+	      while (1)
+		{
+		  tmpi = readlink (dst, tmp, tmpl);
+		  if (tmpi < 1)
+		    {
+		      JCL_free (env, src);
+		      JCL_free (env, dst);
+		      JCL_free (env, tmp);
+		      JCL_ThrowException (env, "java/io/IOException",
+					  "readlink failed");
+		      return NULL;
+		    }
+		  if (tmpi < tmpl)
+		    break;
+		  tmpl += CHUNKSIZ;
+		  tmp = JCL_realloc (env, tmp, tmpl);
+		}
+
+	      /* Prepend the link's path to src. */
+	      tmp = maybeGrowBuf (env,
+				  tmp, &tmpl, tmpi + strlen (&src[srci]) + 1);
+	      if (tmp == NULL)
+		{
+		  JCL_free (env, src);
+		  JCL_free (env, dst);
+		  return NULL;
+		}
+
+	      strcpy (&tmp[tmpi], &src[srci]);
+	      JCL_free (env, src);
+	      src = tmp;
+	      srcl = tmpl;
+	      srci = 0;
+
+	      /* Either replace or append dst depending on whether the
+	       * link is relative or absolute.
+	       */
+	      dsti = src[0] == '/' ? 1 : dsti_save;
+	    }
+	}
+      else
+	{
+	  /* Something doesn't exist, or we don't have permission to
+	   * read it, or a previous path component is a directory, or
+	   * a symlink is looped.  Whatever, we can't check the
+	   * filesystem any more.
+	   */
+	  fschecks = JNI_FALSE;
+	}
+#endif /* HAVE_LSTAT && HAVE_READLINK */
+    }
+  dst[dsti] = '\0';
+
+  jpath = (*env)->NewStringUTF (env, dst);
+  JCL_free (env, src);
+  JCL_free (env, dst);
+  return jpath;
+#else /* not WITHOUT_FILESYSTEM */
+  return NULL;
+#endif /* not WITHOUT_FILESYSTEM */
+}
Index: include/java_io_VMFile.h
===================================================================
RCS file: /cvsroot/classpath/classpath/include/java_io_VMFile.h,v
retrieving revision 1.3
diff -u -r1.3 java_io_VMFile.h
--- include/java_io_VMFile.h	11 Nov 2004 17:31:31 -0000	1.3
+++ include/java_io_VMFile.h	7 Jun 2006 15:08:28 -0000
@@ -24,6 +24,7 @@
 JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canWrite (JNIEnv *env, jclass, jstring);
 JNIEXPORT jboolean JNICALL Java_java_io_VMFile_canRead (JNIEnv *env, jclass, jstring);
 JNIEXPORT jboolean JNICALL Java_java_io_VMFile_isDirectory (JNIEnv *env, jclass, jstring);
+JNIEXPORT jstring JNICALL Java_java_io_VMFile_toCanonicalForm (JNIEnv *env,jclass, jstring);
 #undef java_io_VMFile_IS_CASE_SENSITIVE
 #define java_io_VMFile_IS_CASE_SENSITIVE 1L
 #undef java_io_VMFile_IS_DOS_8_3
Index: vm/reference/java/io/VMFile.java
===================================================================
RCS file: /cvsroot/classpath/classpath/vm/reference/java/io/VMFile.java,v
retrieving revision 1.7
diff -u -r1.7 VMFile.java
--- vm/reference/java/io/VMFile.java	2 Jul 2005 20:33:08 -0000	1.7
+++ vm/reference/java/io/VMFile.java	7 Jun 2006 15:08:28 -0000
@@ -1,5 +1,5 @@
 /* VMFile.java -- Class for methods natively accessing files
-   Copyright (C) 2004  Free Software Foundation, Inc.
+   Copyright (C) 2004, 2006  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -210,10 +210,10 @@
 
   /**
    * This method returns a canonical representation of the pathname of
-   * the given path.  The actual form of the canonical representation is
-   * different.  On the GNU system, the canonical form differs from the
-   * absolute form in that all relative file references to "." and ".."
-   * are resolved and removed.
+   * this file.  The actual form of the canonical representation is
+   * system-dependent.  On the GNU system, conversion to canonical
+   * form involves the removal of redundant separators, references to
+   * "." and "..", and symbolic links.
    * <p>
    * Note that this method, unlike the other methods which return path
    * names, can throw an IOException.  This is because native method 
@@ -221,9 +221,5 @@
    *
    * @exception IOException If an error occurs
    */
-  public static String toCanonicalForm(String path) throws IOException
-  {
-	// FIXME: this only works on UNIX
-	return PlatformHelper.toCanonicalForm(path);
-  }
+  public static native String toCanonicalForm(String path) throws IOException;
 }
Index: gnu/java/io/PlatformHelper.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/io/PlatformHelper.java,v
retrieving revision 1.6
diff -u -r1.6 PlatformHelper.java
--- gnu/java/io/PlatformHelper.java	14 Nov 2005 13:08:11 -0000	1.6
+++ gnu/java/io/PlatformHelper.java	7 Jun 2006 15:08:28 -0000
@@ -1,5 +1,5 @@
 /* PlatformHelper.java -- Isolate OS-specific IO helper methods and variables
-   Copyright (C) 1998, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1998, 2002, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -97,98 +97,6 @@
   }
 
   /**
-   * This routine canonicalizes input param "path" to formal path representation
-   *  for current platform, including interpreting ".." and "." .
-   */
-  public static final String toCanonicalForm(String path)
-  {
-    /*??
-    if(path.indexOf('.') < 0 && path.indexOf("..") < 0)
-        return path; 
-    */
-    String tmppath = path.replace('/', separatorChar);
-    StringBuffer canonpath;
-
-    int i;
-
-    if ((i = beginWithRootPathPrefix(tmppath)) == 0 )
-      return path;
-    
-    /* The original 
-           "canonpath = new StringBuffer(tmppath.substring(0, i))"
-       isn't very efficient because StringBuffer's 
-       ensureCapacity_unsynchronized will fail definitely each time 
-       and will enlarge buffer and copy contents.       .
-    */
-    canonpath = new StringBuffer(INITIAL_MAX_PATH);
-    canonpath.append(tmppath.substring(0, i));
-    tmppath = tmppath.substring(i);
-    // pathdepth==0 indicates there're only root path in the buffer
-    int pathdepth = 0;
-    
-    StringTokenizer st = new StringTokenizer(tmppath, separator);
-    
-    // Traverse each element of the path, handling "." and ".."
-    // Should handle "~" too?
-    if (st.hasMoreTokens())
-      do
-        {
-          String s = st.nextToken();
-        
-          // Handle "." or an empty element.  
-          if (s.equals(".") || s.equals(""))
-            continue;
-        
-          // Handle ".." by deleting the last element from the path
-          if (s.equals(".."))
-            {
-              if (pathdepth == 0)
-                continue;
-
-              // Strip of trailing separator
-              canonpath.setLength(canonpath.length() - 1/*separator.length()*/);
-              String tmpstr = canonpath.toString();
-              int idx = tmpstr.lastIndexOf(separator); 
-
-              if ((idx == -1) || ((idx + 1/*separator.length()*/) > tmpstr.length()))
-                //throw new IOException("Can't happen error"); 
-                return path; // Shouldn't happen 
-        
-              canonpath.setLength(idx + 1/*separator.length()*/);
-              pathdepth--;
-              continue;
-            }
-        
-          canonpath.append(s);
-          pathdepth++; //now it's more than root path
-
-          if (st.hasMoreTokens())
-            canonpath.append(separator);
-        }
-      while (st.hasMoreTokens());
-    
-    if (endWithSeparator(path))
-      canonpath.append(separator);
-        
-    String tmpstr = canonpath.toString();
-    //if (pathdepth > 0 && endWithSeparator(tmpstr) )
-    //    tmpstr = tmpstr.substring(0, tmpstr.length() - 1/*separator.length()*/);
-    
-    return tmpstr;
-  }
-
-  /**
-   * This routine canonicalizes input param "path" to formal path representation
-   *  for current platform, and normalize all separators to "sepchar".
-   */
-  public static final String toCanonicalForm(String path, char sepchar)
-  {
-    String tmpstr = toCanonicalForm(path);
-    tmpstr = tmpstr.replace(separatorChar, sepchar);
-    return tmpstr;
-  }
-
-  /**
    * This routine checks whether input param "path" ends with separator
    */
   public static final boolean endWithSeparator(String path)
Index: java/io/File.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/io/File.java,v
retrieving revision 1.61
diff -u -r1.61 File.java
--- java/io/File.java	17 Dec 2005 21:16:23 -0000	1.61
+++ java/io/File.java	7 Jun 2006 15:08:28 -0000
@@ -1,5 +1,5 @@
 /* File.java -- Class representing a file on disk
-   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005
+   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006
    Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
@@ -484,9 +484,9 @@
   /**
    * This method returns a canonical representation of the pathname of
    * this file.  The actual form of the canonical representation is
-   * different.  On the GNU system, the canonical form differs from the
-   * absolute form in that all relative file references to "." and ".."
-   * are resolved and removed.
+   * system-dependent.  On the GNU system, conversion to canonical
+   * form involves the removal of redundant separators, references to
+   * "." and "..", and symbolic links.
    * <p>
    * Note that this method, unlike the other methods which return path
    * names, can throw an IOException.  This is because native method 


More information about the Classpath-patches mailing list