diff options
| author | Joey Hess <joey@kitenet.net> | 2014-05-14 19:41:05 -0400 |
|---|---|---|
| committer | Joey Hess <joey@kitenet.net> | 2014-05-14 19:41:05 -0400 |
| commit | 7115d1ec162b4059b3e8e8f84bd8d5898c1db025 (patch) | |
| tree | 42c1cce54e890e1d56484794ab33129132d8fee2 /Utility/FileSystemEncoding.hs | |
| parent | ffe371a9d42cded461236e972a24a142419d7fc4 (diff) | |
moved source code to src
This is to work around OSX's brain-damange regarding filename case
insensitivity.
Avoided moving config.hs, because it's a config file. Put in a symlink to
make build work.
Diffstat (limited to 'Utility/FileSystemEncoding.hs')
| -rw-r--r-- | Utility/FileSystemEncoding.hs | 132 |
1 files changed, 0 insertions, 132 deletions
diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs deleted file mode 100644 index b81fdc53..00000000 --- a/Utility/FileSystemEncoding.hs +++ /dev/null @@ -1,132 +0,0 @@ -{- GHC File system encoding handling. - - - - Copyright 2012-2014 Joey Hess <joey@kitenet.net> - - - - License: BSD-2-clause - -} - -{-# LANGUAGE CPP #-} - -module Utility.FileSystemEncoding ( - fileEncoding, - withFilePath, - md5FilePath, - decodeBS, - decodeW8, - encodeW8, - truncateFilePath, -) where - -import qualified GHC.Foreign as GHC -import qualified GHC.IO.Encoding as Encoding -import Foreign.C -import System.IO -import System.IO.Unsafe -import qualified Data.Hash.MD5 as MD5 -import Data.Word -import Data.Bits.Utils -import qualified Data.ByteString.Lazy as L -#ifdef mingw32_HOST_OS -import qualified Data.ByteString.Lazy.UTF8 as L8 -#endif - -{- Sets a Handle to use the filesystem encoding. This causes data - - written or read from it to be encoded/decoded the same - - as ghc 7.4 does to filenames etc. This special encoding - - allows "arbitrary undecodable bytes to be round-tripped through it". - -} -fileEncoding :: Handle -> IO () -#ifndef mingw32_HOST_OS -fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding -#else -{- The file system encoding does not work well on Windows, - - and Windows only has utf FilePaths anyway. -} -fileEncoding h = hSetEncoding h Encoding.utf8 -#endif - -{- Marshal a Haskell FilePath into a NUL terminated C string using temporary - - storage. The FilePath is encoded using the filesystem encoding, - - reversing the decoding that should have been done when the FilePath - - was obtained. -} -withFilePath :: FilePath -> (CString -> IO a) -> IO a -withFilePath fp f = Encoding.getFileSystemEncoding - >>= \enc -> GHC.withCString enc fp f - -{- Encodes a FilePath into a String, applying the filesystem encoding. - - - - There are very few things it makes sense to do with such an encoded - - string. It's not a legal filename; it should not be displayed. - - So this function is not exported, but instead used by the few functions - - that can usefully consume it. - - - - This use of unsafePerformIO is belived to be safe; GHC's interface - - only allows doing this conversion with CStrings, and the CString buffer - - is allocated, used, and deallocated within the call, with no side - - effects. - -} -{-# NOINLINE _encodeFilePath #-} -_encodeFilePath :: FilePath -> String -_encodeFilePath fp = unsafePerformIO $ do - enc <- Encoding.getFileSystemEncoding - GHC.withCString enc fp $ GHC.peekCString Encoding.char8 - -{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -} -md5FilePath :: FilePath -> MD5.Str -md5FilePath = MD5.Str . _encodeFilePath - -{- Decodes a ByteString into a FilePath, applying the filesystem encoding. -} -decodeBS :: L.ByteString -> FilePath -#ifndef mingw32_HOST_OS -decodeBS = encodeW8 . L.unpack -#else -{- On Windows, we assume that the ByteString is utf-8, since Windows - - only uses unicode for filenames. -} -decodeBS = L8.toString -#endif - -{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. - - - - w82c produces a String, which may contain Chars that are invalid - - unicode. From there, this is really a simple matter of applying the - - file system encoding, only complicated by GHC's interface to doing so. - -} -{-# NOINLINE encodeW8 #-} -encodeW8 :: [Word8] -> FilePath -encodeW8 w8 = unsafePerformIO $ do - enc <- Encoding.getFileSystemEncoding - GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc - -{- Useful when you want the actual number of bytes that will be used to - - represent the FilePath on disk. -} -decodeW8 :: FilePath -> [Word8] -decodeW8 = s2w8 . _encodeFilePath - -{- Truncates a FilePath to the given number of bytes (or less), - - as represented on disk. - - - - Avoids returning an invalid part of a unicode byte sequence, at the - - cost of efficiency when running on a large FilePath. - -} -truncateFilePath :: Int -> FilePath -> FilePath -#ifndef mingw32_HOST_OS -truncateFilePath n = go . reverse - where - go f = - let bytes = decodeW8 f - in if length bytes <= n - then reverse f - else go (drop 1 f) -#else -{- On Windows, count the number of bytes used by each utf8 character. -} -truncateFilePath n = reverse . go [] n . L8.fromString - where - go coll cnt bs - | cnt <= 0 = coll - | otherwise = case L8.decode bs of - Just (c, x) | c /= L8.replacement_char -> - let x' = fromIntegral x - in if cnt - x' < 0 - then coll - else go (c:coll) (cnt - x') (L8.drop 1 bs) - _ -> coll -#endif |
