-
Notifications
You must be signed in to change notification settings - Fork 0
/
MLData.hs
39 lines (31 loc) · 1.26 KB
/
MLData.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
module MLData where
import qualified Data.ByteString.Lazy.Char8 as C
import qualified Data.Map as Map
data Example = Example { lab :: Int, features :: FeatureList} deriving (Show)
type FeatureList = Map.Map Int Float
type ExampleSet = [Example]
parseExample :: [C.ByteString] -> Example
parseExample (lab:features) =
let parseFeature (f:v:_) = (read f, read v)
featuresSplit = map (map C.unpack) . map (C.split ':') $ features
featureTuples = map parseFeature featuresSplit
featureList = Map.fromList featureTuples
intLab = read . C.unpack $ lab
in Example intLab featureList
loadExampleSet :: C.ByteString -> ExampleSet
loadExampleSet input =
let inputWords = map C.words $ C.lines input
in map parseExample inputWords
loadWeights :: C.ByteString -> [Float]
loadWeights input = map read . map C.unpack . C.split ',' $ input
exampleToVector :: Int -> Example -> [Float]
exampleToVector n_features (Example _ fl) =
let findVal i = case Map.lookup i fl of
Nothing -> 0
Just (v) -> v
in [findVal x | x <- [1..n_features]]
findNFeatures :: ExampleSet -> Int
findNFeatures [] = 0
findNFeatures exs =
let maxFeature (Example _ features) = maximum $ Map.keys features
in maximum . map maxFeature $ exs