N-граммы в действии

This commit is contained in:
2024-12-09 16:48:54 +03:00
parent 5700d5892a
commit 93bb0f247c
4 changed files with 28 additions and 2 deletions

View File

@@ -8,4 +8,7 @@ main =
putStrLn "Введите имя файла:" >>
getLine >>= \fileName ->
readFile fileName >>= \content ->
uprint $ splitText content
let sentences = splitText content in
uprint (take 10 sentences) >>
let dict = buildDictionary sentences in
saveDictionary "dict.txt" dict

View File

@@ -21,6 +21,7 @@ description: Please see the README on GitHub at <https://github.com/gith
dependencies:
- base >= 4.7 && < 5
- containers
- unescaping-print
ghc-options:

View File

@@ -35,6 +35,7 @@ library
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
build-depends:
base >=4.7 && <5
, containers
, unescaping-print
default-language: Haskell2010
@@ -49,6 +50,7 @@ executable part2-exe
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
build-depends:
base >=4.7 && <5
, containers
, part2
, unescaping-print
default-language: Haskell2010
@@ -65,6 +67,7 @@ test-suite part2-test
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
build-depends:
base >=4.7 && <5
, containers
, part2
, unescaping-print
default-language: Haskell2010

View File

@@ -1,6 +1,11 @@
module Lib where
import Data.Char (isLetter, toLower)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List (nub, tails)
import System.IO
import UnescapingPrint (ushow)
splitText :: String -> [[String]]
@@ -22,4 +27,18 @@ splitText text = filter (not . null) $ map (processSentence . words) (splitSente
processSentence = filter (not . null) . map cleanWord
cleanWord :: String -> String
cleanWord = map toLower . filter isLetter
cleanWord = map toLower . filter isLetter
buildDictionary :: [[String]] -> Map String [String]
buildDictionary sentences =
let bigrams = [ (w1, w2) | s <- sentences, (w1:w2:_) <- tails s ]
trigrams = [ (w1, w2, w3) | s <- sentences, (w1:w2:w3:_) <- tails s ]
singleKeys = foldr (\(w1, w2) acc -> Map.insertWith (++) w1 [w2] acc) Map.empty bigrams
singleKeys' = foldr (\(w1, w2, w3) acc -> Map.insertWith (++) w1 [w2 ++ " " ++ w3] acc) singleKeys trigrams
doubleKeys = foldr (\(w1, w2, w3) acc -> Map.insertWith (++) (w1 ++ " " ++ w2) [w3] acc) Map.empty trigrams
combined = Map.unionWith (++) singleKeys' doubleKeys
in Map.map nub combined
saveDictionary :: FilePath -> Map String [String] -> IO ()
saveDictionary filePath dict = withFile filePath WriteMode $ \h ->
mapM_ (\(k,v) -> hPutStrLn h $ ushow k ++ ": " ++ ushow v) (Map.toList dict)