Skip to content

Commit

Permalink
Add Windows implementation of ReadDir module
Browse files Browse the repository at this point in the history
  • Loading branch information
harendra-kumar committed Jul 8, 2024
1 parent 4dbf93a commit e3c03ea
Show file tree
Hide file tree
Showing 4 changed files with 247 additions and 53 deletions.
53 changes: 15 additions & 38 deletions core/src/Streamly/Internal/FileSystem/Dir.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ module Streamly.Internal.FileSystem.Dir
, readEither
, readEitherPaths
, readEitherChunks
, _readEitherChunks
, readEitherByteChunks

-- We can implement this in terms of readAttrsRecursive without losing
-- perf.
Expand Down Expand Up @@ -94,13 +92,14 @@ import Streamly.Internal.Data.Unfold (Step(..))
import Streamly.Internal.Data.Unfold.Type (Unfold(..))
import Streamly.Internal.FileSystem.Path (Path)
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
import qualified System.Win32 as Win32
import qualified Streamly.Internal.Data.Fold as Fold
import Streamly.Internal.FileSystem.Windows.ReadDir
(DirStream, openDirStream, closeDirStream, readDirStreamEither)
#else
import Streamly.Internal.FileSystem.Posix.ReadDir
( DirStream, openDirStream, closeDirStream, readDirStreamEither
, readEitherChunks, readEitherByteChunks)
, readEitherChunks)
#endif
import qualified Streamly.Internal.Data.Fold as Fold
import qualified Streamly.Internal.Data.Stream as S
import qualified Streamly.Data.Unfold as UF
import qualified Streamly.Internal.Data.Unfold as UF (mapM2, bracketIO)
Expand Down Expand Up @@ -242,8 +241,6 @@ toStreamWithBufferOf chunkSize h = AS.concat $ toChunksWithBufferOf chunkSize h

-- XXX exception handling

#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)

{-# INLINE streamEitherReader #-}
streamEitherReader :: MonadIO m =>
Unfold m DirStream (Either Path Path)
Expand All @@ -260,30 +257,6 @@ streamEitherReader = Unfold step return
streamReader :: MonadIO m => Unfold m DirStream Path
streamReader = fmap (either id id) streamEitherReader

#else

openDirStream :: String -> IO (Win32.HANDLE, Win32.FindData)
openDirStream = Win32.findFirstFile

closeDirStream :: (Win32.HANDLE, Win32.FindData) -> IO ()
closeDirStream (h, _) = Win32.findClose h

{-# INLINE streamReader #-}
streamReader :: MonadIO m => Unfold m (Win32.HANDLE, Win32.FindData) Path
streamReader = Unfold step return

where

step (h, fdat) = do
more <- liftIO $ Win32.findNextFile h fdat
if more
then do
filepath <- liftIO $ Win32.getFindDataFileName fdat
filename <- Path.fromString filepath
return $ Yield filename (h, fdat)
else return Stop
#endif

-- | Read a directory emitting a stream with names of the children. Filter out
-- "." and ".." entries.
--
Expand Down Expand Up @@ -311,15 +284,11 @@ reader =
{-# INLINE eitherReader #-}
eitherReader :: (MonadIO m, MonadCatch m) =>
Unfold m Path (Either Path Path)
#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
eitherReader =
-- XXX The measured overhead of bracketIO is not noticeable, if it turns
-- out to be a problem for small filenames we can use getdents64 to use
-- chunked read to avoid the overhead.
UF.bracketIO openDirStream closeDirStream streamEitherReader
#else
eitherReader = undefined
#endif

{-# INLINE eitherReaderPaths #-}
eitherReaderPaths ::(MonadIO m, MonadCatch m) =>
Expand Down Expand Up @@ -374,13 +343,20 @@ readEitherPaths dir =
let (</>) = Path.append
in fmap (bimap (dir </>) (dir </>)) $ readEither dir

#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
-- XXX Implement a custom version of readEitherChunks (like for Posix) for
-- windows as well. Also implement readEitherByteChunks.
--
-- XXX For a fast custom implementation of traversal, the Right could be the
-- final array chunk including all files and dirs to be written to IO. The Left
-- could be list of dirs to be traversed.
{-# INLINE _readEitherChunks #-}
_readEitherChunks :: (MonadIO m, MonadCatch m) =>
--
-- This is a generic (but slower?) version of readEitherChunks using
-- eitherReaderPaths.
{-# INLINE readEitherChunks #-}
readEitherChunks :: (MonadIO m, MonadCatch m) =>
[Path] -> Stream m (Either [Path] [Path])
_readEitherChunks dirs =
readEitherChunks dirs =
-- XXX Need to use a take to limit the group size. There will be separate
-- limits for dir and files groups.
S.groupsWhile grouper collector
Expand All @@ -405,6 +381,7 @@ _readEitherChunks dirs =
Right _ -> Left [x1] -- initial
_ -> either (\xs -> Left (x1:xs)) Right b
Right x1 -> fmap (x1:) b
#endif

{-# DEPRECATED toEither "Please use 'readEither' instead" #-}
{-# INLINE toEither #-}
Expand Down
31 changes: 16 additions & 15 deletions core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,10 @@ throwErrnoPathIfNullRetry loc path f =
-- requires unix >= 2.8
-------------------------------------------------------------------------------

newtype DirStream = DirStream (Ptr CDir)

data {-# CTYPE "DIR" #-} CDir
data {-# CTYPE "struct dirent" #-} CDirent

-- | @closeDirStream dp@ calls @closedir@ to close
-- the directory stream @dp@.
closeDirStream :: DirStream -> IO ()
closeDirStream (DirStream dirp) = do
throwErrnoIfMinus1Retry_ "closeDirStream" (c_closedir dirp)
newtype DirStream = DirStream (Ptr CDir)

-------------------------------------------------------------------------------

Expand All @@ -94,12 +88,14 @@ foreign import ccall unsafe "closedir"
foreign import capi unsafe "dirent.h opendir"
c_opendir :: CString -> IO (Ptr CDir)

-- XXX The "unix" package uses a wrapper over readdir __hscore_readdir (see
-- cbits/HsUnix.c in unix package) which uses readdir_r in some cases where
-- readdir is not known to be re-entrant. We are not doing that here. We are
-- assuming that readdir is re-entrant which may not be the case on some old
-- unix systems.
foreign import ccall unsafe "dirent.h readdir"
c_readdir :: Ptr CDir -> IO (Ptr CDirent)

foreign import ccall unsafe "__hscore_d_name"
d_name :: Ptr CDirent -> IO CString

-- XXX Use openat instead of open so that we do not have to build and resolve
-- absolute paths.
--
Expand All @@ -109,10 +105,15 @@ foreign import ccall unsafe "__hscore_d_name"
openDirStream :: PosixPath -> IO DirStream
openDirStream p =
Array.asCStringUnsafe (Path.toChunk p) $ \s -> do
-- XXX is toString always creating another copy or only in case of error?
dirp <- throwErrnoPathIfNullRetry "openDirStream" p $ c_opendir s
return (DirStream dirp)

-- | @closeDirStream dp@ calls @closedir@ to close
-- the directory stream @dp@.
closeDirStream :: DirStream -> IO ()
closeDirStream (DirStream dirp) = do
throwErrnoIfMinus1Retry_ "closeDirStream" (c_closedir dirp)

isMetaDir :: Ptr CChar -> IO Bool
isMetaDir dname = do
-- XXX Assuming an encoding that maps "." to ".", this is true for
Expand Down Expand Up @@ -155,7 +156,7 @@ readDirStreamEither (DirStream dirp) = loop
ptr <- c_readdir dirp
if (ptr /= nullPtr)
then do
dname <- d_name ptr
let dname = #{ptr struct dirent, d_name} ptr
dtype :: #{type unsigned char} <- #{peek struct dirent, d_type} ptr
-- dreclen :: #{type unsigned short} <- #{peek struct dirent, d_reclen} ptr
-- It is possible to find the name length using dreclen and then use
Expand Down Expand Up @@ -228,7 +229,7 @@ readEitherChunks alldirs =
dentPtr <- liftIO $ c_readdir dirp
if (dentPtr /= nullPtr)
then do
dname <- liftIO $ d_name dentPtr
let dname = #{ptr struct dirent, d_name} dentPtr
dtype :: #{type unsigned char} <-
liftIO $ #{peek struct dirent, d_type} dentPtr

Expand Down Expand Up @@ -391,7 +392,7 @@ readEitherByteChunks alldirs =
dentPtr <- liftIO $ c_readdir dirp
if (dentPtr /= nullPtr)
then do
dname <- liftIO $ d_name dentPtr
let dname = #{ptr struct dirent, d_name} dentPtr
dtype :: #{type unsigned char} <-
liftIO $ #{peek struct dirent, d_type} dentPtr

Expand Down Expand Up @@ -440,5 +441,5 @@ readEitherByteChunks alldirs =
liftIO $ closeDirStream (DirStream dirp)
if (n == 0)
then return $ Skip (ChunkStreamByteInit xs dirs ndirs mbarr pos)
else liftIO $ throwErrno "readEitherChunks"
else liftIO $ throwErrno "readEitherByteChunks"
#endif
Loading

0 comments on commit e3c03ea

Please sign in to comment.