diff --git a/.travis.yml b/.travis.yml index 47def082..371df0d9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,6 +31,7 @@ deploy: file: - count - plagiarism + - diagnostics skip_cleanup: true on: tags: true diff --git a/app/count/Main.hs b/app/count/Main.hs index ad787aef..5ca53503 100644 --- a/app/count/Main.hs +++ b/app/count/Main.hs @@ -1,6 +1,5 @@ -import Lichen.Config.Languages -import Lichen.Config.Count +import Lichen.Count.Config import Lichen.Count.Main main :: IO () -main = realMain $ defaultConfig { language = langC } +main = realMain defaultConfig diff --git a/app/diagnostics/Main.hs b/app/diagnostics/Main.hs index aad69a6e..6ea17fb8 100644 --- a/app/diagnostics/Main.hs +++ b/app/diagnostics/Main.hs @@ -1,4 +1,4 @@ -import Lichen.Config.Diagnostics +import Lichen.Diagnostics.Config import Lichen.Diagnostics.Main main :: IO () diff --git a/app/plagiarism/Main.hs b/app/plagiarism/Main.hs index 06334b47..047e1dc2 100644 --- a/app/plagiarism/Main.hs +++ b/app/plagiarism/Main.hs @@ -1,6 +1,5 @@ -import Lichen.Config.Languages -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config import Lichen.Plagiarism.Main main :: IO () -main = realMain $ defaultConfig { language = langC } +main = realMain defaultConfig diff --git a/build.sh b/build.sh index f3a7ec91..5df89ea2 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,7 @@ #!/bin/sh VERSION=$1 -BINARIES="count plagiarism" +BINARIES="count plagiarism diagnostics" if [ -z $VERSION ] then VERSION="latest" diff --git a/lichen.cabal b/lichen.cabal index 1a38c02c..4a0f07af 100644 --- a/lichen.cabal +++ b/lichen.cabal @@ -1,5 +1,5 @@ name: lichen -version: 0.2.7 +version: 0.3.0 synopsis: Plagiarism Detection and Other Static Analysis description: Please see README.md homepage: https://github.com/Submitty/AnalysisTools @@ -14,17 +14,19 @@ library hs-source-dirs: src exposed-modules: Lichen.Util , Lichen.Error + , Lichen.Languages , Lichen.Config - , Lichen.Config.Languages - , Lichen.Config.Plagiarism - , Lichen.Config.Count - , Lichen.Config.Diagnostics + , Lichen.Lexer , Lichen.Lexer.C , Lichen.Lexer.Python + , Lichen.Lexer.Java + , Lichen.Parser , Lichen.Parser.Python + , Lichen.Plagiarism.Main + , Lichen.Plagiarism.Config , Lichen.Plagiarism.Winnow , Lichen.Plagiarism.Compare , Lichen.Plagiarism.Walk @@ -36,9 +38,13 @@ library , Lichen.Plagiarism.Render , Lichen.Plagiarism.Render.Index , Lichen.Plagiarism.Render.Compare + , Lichen.Count.Main + , Lichen.Count.Config , Lichen.Count.Counters + , Lichen.Diagnostics.Main + , Lichen.Diagnostics.Config build-depends: base >= 4.7 && < 5 , containers , split @@ -60,7 +66,7 @@ library , jmacro , QuickCheck , hspec - ghc-options: -O2 -Wall -fwarn-incomplete-patterns -fno-warn-orphans + ghc-options: -O2 -Wall default-language: Haskell2010 executable plagiarism diff --git a/src/Lichen/Config/Count.hs b/src/Lichen/Count/Config.hs similarity index 94% rename from src/Lichen/Config/Count.hs rename to src/Lichen/Count/Config.hs index 1e17c2b6..bece4324 100644 --- a/src/Lichen/Config/Count.hs +++ b/src/Lichen/Count/Config.hs @@ -1,12 +1,12 @@ {-# LANGUAGE OverloadedStrings, RecordWildCards #-} -module Lichen.Config.Count where +module Lichen.Count.Config where import Data.Maybe import Data.Aeson import Lichen.Config -import Lichen.Config.Languages +import Lichen.Languages import Lichen.Count.Counters data Config = Config diff --git a/src/Lichen/Count/Counters.hs b/src/Lichen/Count/Counters.hs index e8ad7aca..b00be42a 100644 --- a/src/Lichen/Count/Counters.hs +++ b/src/Lichen/Count/Counters.hs @@ -10,7 +10,7 @@ import qualified Data.ByteString as BS import Control.Monad.Except import Lichen.Error -import Lichen.Config.Languages +import Lichen.Languages import Lichen.Lexer import Lichen.Util import qualified Lichen.Parser as P @@ -24,7 +24,7 @@ counterDummy :: Counter counterDummy = Counter $ \_ _ _ -> throwError $ InvocationError "Invalid counting method specified" counterToken :: Counter -counterToken = Counter $ \(Language _ l _ readTok _) t p -> do +counterToken = Counter $ \(Language _ _ readTok l _) t p -> do ssrc <- liftIO $ readSafe (liftIO . BS.readFile) (throwError $ InvocationError "File not found") p src <- ssrc tokens <- l p src diff --git a/src/Lichen/Count/Main.hs b/src/Lichen/Count/Main.hs index b92d8d92..2b2d7c21 100644 --- a/src/Lichen/Count/Main.hs +++ b/src/Lichen/Count/Main.hs @@ -18,8 +18,8 @@ import Options.Applicative import Lichen.Util import Lichen.Error import Lichen.Config -import Lichen.Config.Languages -import Lichen.Config.Count +import Lichen.Languages +import Lichen.Count.Config import Lichen.Count.Counters parseOptions :: Config -> Parser Config diff --git a/src/Lichen/Config/Diagnostics.hs b/src/Lichen/Diagnostics/Config.hs similarity index 94% rename from src/Lichen/Config/Diagnostics.hs rename to src/Lichen/Diagnostics/Config.hs index 786f961c..c5e17f0c 100644 --- a/src/Lichen/Config/Diagnostics.hs +++ b/src/Lichen/Diagnostics/Config.hs @@ -1,12 +1,12 @@ {-# LANGUAGE OverloadedStrings, RecordWildCards #-} -module Lichen.Config.Diagnostics where +module Lichen.Diagnostics.Config where import Data.Maybe import Data.Aeson import Lichen.Config -import Lichen.Config.Languages +import Lichen.Languages data Config = Config { dataDir :: FilePath diff --git a/src/Lichen/Diagnostics/Main.hs b/src/Lichen/Diagnostics/Main.hs index 334d02c8..6aa69bec 100644 --- a/src/Lichen/Diagnostics/Main.hs +++ b/src/Lichen/Diagnostics/Main.hs @@ -23,14 +23,14 @@ import Options.Applicative import Lichen.Util import Lichen.Error import Lichen.Config -import Lichen.Config.Languages -import Lichen.Config.Diagnostics +import Lichen.Languages +import Lichen.Diagnostics.Config import Lichen.Plagiarism.Concatenate import Lichen.Plagiarism.Compare import Lichen.Plagiarism.Walk import Lichen.Plagiarism.Winnow import Lichen.Plagiarism.AssignmentSettings -import qualified Lichen.Config.Plagiarism as Plagiarism +import qualified Lichen.Plagiarism.Config as Plagiarism compareTag :: String -> [(Fingerprints, String)] -> [(Double, (Fingerprints, String), (Fingerprints, String))] compareTag t prints = case ours of Just x -> curry compareFingerprints x <$> prints @@ -40,7 +40,7 @@ compareTag t prints = case ours of Just x -> curry compareFingerprints x <$> pri ours = tagged prints diagnosticsToken :: Language -> FilePath -> Erring Pair -diagnosticsToken (Language _ l _ _ _) p = do +diagnosticsToken (Language _ _ _ l _) p = do src <- liftIO $ BS.readFile p tokens <- l p src return $ T.pack p .= toJSON tokens diff --git a/src/Lichen/Error.hs b/src/Lichen/Error.hs index 51b7c7c7..6453e1fb 100644 --- a/src/Lichen/Error.hs +++ b/src/Lichen/Error.hs @@ -14,6 +14,8 @@ import Text.Megaparsec.Error (ParseError, parseErrorPretty) import Control.Monad.Except (ExceptT) +import Lichen.Util + type Erring = ExceptT LichenError IO data LichenError = LexError (ParseError (Token BS.ByteString) Dec) @@ -24,8 +26,8 @@ data LichenError = LexError (ParseError (Token BS.ByteString) Dec) deriving Show printError :: LichenError -> IO () -printError (LexError e) = T.IO.hPutStrLn stderr "Lexer error: " >> hPutStrLn stderr (parseErrorPretty e) -printError (ParseError t) = T.IO.hPutStrLn stderr ("Parser error: " <> t) -printError (InvalidTokenError t) = T.IO.hPutStrLn stderr ("Invalid token error: " <> t) -printError (InvocationError t) = T.IO.hPutStrLn stderr ("Invocation error: " <> t) -printError (JSONDecodingError t) = T.IO.hPutStrLn stderr ("JSON decoding error: " <> t) +printError (LexError e) = err $ T.IO.hPutStrLn stderr "Lexer error: " >> hPutStrLn stderr (parseErrorPretty e) +printError (ParseError t) = err $ T.IO.hPutStrLn stderr ("Parser error: " <> t) +printError (InvalidTokenError t) = err $ T.IO.hPutStrLn stderr ("Invalid token error: " <> t) +printError (InvocationError t) = err $ T.IO.hPutStrLn stderr ("Invocation error: " <> t) +printError (JSONDecodingError t) = err $ T.IO.hPutStrLn stderr ("JSON decoding error: " <> t) diff --git a/src/Lichen/Config/Languages.hs b/src/Lichen/Languages.hs similarity index 73% rename from src/Lichen/Config/Languages.hs rename to src/Lichen/Languages.hs index ba76c398..f0af0ca4 100644 --- a/src/Lichen/Config/Languages.hs +++ b/src/Lichen/Languages.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, GADTs, DeriveGeneric, StandaloneDeriving #-} -module Lichen.Config.Languages where +module Lichen.Languages where import GHC.Generics @@ -17,6 +17,7 @@ import Lichen.Lexer import Lichen.Parser import qualified Lichen.Lexer.C as C import qualified Lichen.Lexer.Python as Python +import qualified Lichen.Lexer.Java as Java import qualified Lichen.Parser.Python as Python -- Configuration for the winnowing algorithm. Token sequences shorter than @@ -35,30 +36,33 @@ instance FromJSON WinnowConfig -- is an HLint bug, and can safely be ignored. data Language where Language :: (Hashable a, Show a) => { exts :: [FilePath] - , lexer :: Lexer a , winnowConfig :: WinnowConfig , readToken :: String -> Erring a + , lexer :: Lexer a , parser :: Parser Node } -> Language instance FromJSON Language where parseJSON (String s) = pure $ languageChoice langDummy (Just $ T.unpack s) parseJSON _ = pure langDummy -dummy :: a -> b -> Erring c -dummy _ _ = throwError $ InvocationError "Specified analysis method is undefined for language" +dummy :: T.Text -> a -> b -> Erring c +dummy t _ _ = throwError $ InvocationError t smartRead :: Read a => String -> Erring a smartRead s = case readMaybe s of Just t -> pure t Nothing -> throwError . InvalidTokenError $ T.pack s langDummy :: Language -langDummy = Language [] dummy (WinnowConfig 0 0) (const $ pure ()) dummy +langDummy = Language [] (WinnowConfig 0 0) (const $ pure ()) (dummy "No valid language specified") (dummy "No valid language specified") langC :: Language -langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] C.lex (WinnowConfig 16 9) (smartRead :: String -> Erring C.Tok) dummy +langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] (WinnowConfig 16 9) (smartRead :: String -> Erring C.Tok) C.lex (dummy "The C tooling does not currently support the requested feature") langPython :: Language -langPython = Language [".py"] Python.lex (WinnowConfig 16 9) (smartRead :: String -> Erring Python.Tok) Python.parse +langPython = Language [".py"] (WinnowConfig 16 9) (smartRead :: String -> Erring Python.Tok) Python.lex Python.parse + +langJava :: Language +langJava = Language [".java"] (WinnowConfig 16 9) (smartRead :: String -> Erring Java.Tok) Java.lex (dummy "The Java tooling does not currently support the requested feature") languageChoice :: Language -> Maybe String -> Language languageChoice d Nothing = d @@ -67,4 +71,6 @@ languageChoice _ (Just "c") = langC languageChoice _ (Just "Python") = langPython languageChoice _ (Just "python") = langPython languageChoice _ (Just "py") = langPython +languageChoice _ (Just "java") = langJava +languageChoice _ (Just "Java") = langJava languageChoice _ _ = langDummy diff --git a/src/Lichen/Lexer.hs b/src/Lichen/Lexer.hs index 5f698255..cdf14b42 100644 --- a/src/Lichen/Lexer.hs +++ b/src/Lichen/Lexer.hs @@ -40,8 +40,8 @@ wrap p x = do return . Tagged x $ TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos (fromIntegral $ length s)) -- Parse a C-style character literal. Ex: 'a', '@'. -charLit :: Parser Char -charLit = char '\'' *> L.charLiteral <* char '\'' +charLit :: Parser String +charLit = char '\'' *> manyTill L.charLiteral (char '\'') -- Parse a C-style string literal. Ex: "a", "hello, world". strLit :: Parser String diff --git a/src/Lichen/Lexer/C.hs b/src/Lichen/Lexer/C.hs index 4098365c..ab3d019d 100644 --- a/src/Lichen/Lexer/C.hs +++ b/src/Lichen/Lexer/C.hs @@ -5,7 +5,7 @@ module Lichen.Lexer.C where import GHC.Generics (Generic) import Control.Monad ---import Control.Monad.Except +import Control.Monad.Except import Data.Hashable @@ -13,7 +13,7 @@ import Text.Megaparsec import Text.Megaparsec.ByteString import qualified Text.Megaparsec.Lexer as L ---import Lichen.Error +import Lichen.Error import Lichen.Lexer data Tok = Auto | Break | Case | Char | Const | Continue | Default | Do @@ -137,10 +137,9 @@ onetoken = wrap (reserved "auto") Auto <|> wrap (reserved "^") Caret <|> wrap (reserved "|") Pipe <|> wrap (reserved "?") Question - <|> wrap ((:[]) <$> L.charLiteral) Unknown + <|> wrap ((:[]) <$> anyChar) Unknown lex :: Lexer Tok lex p d = case runParser (many (sc *> onetoken <* sc)) p d of - --Left e -> throwError $ LexError e - Left _ -> return [] + Left e -> throwError $ LexError e Right t -> return t diff --git a/src/Lichen/Lexer/Java.hs b/src/Lichen/Lexer/Java.hs new file mode 100644 index 00000000..0bc9540f --- /dev/null +++ b/src/Lichen/Lexer/Java.hs @@ -0,0 +1,146 @@ +{-# LANGUAGE DeriveGeneric #-} + +module Lichen.Lexer.Java where + +import GHC.Generics (Generic) + +import Control.Monad +import Control.Monad.Except + +import Data.Hashable + +import Text.Megaparsec +import Text.Megaparsec.ByteString +import qualified Text.Megaparsec.Lexer as L + +import Lichen.Error +import Lichen.Lexer + +data Tok = Abstract | Assert | Boolean | Break | Byte | Case | Catch | Char + | Class | Const | Continue | Default | Do | Double | Else | Extends + | Finally | Float | For | Goto | If | Implements | Import | InstanceOf + | Int | Interface | Long | Native | New | Package | Private + | Protected | Public | Return | Static | StrictFP | Super + | Synchronized | Switch |This | Throw | Throws | Transient | Try | Void + | Volatile | While | Identifier | IntegerLiteral | FloatLiteral + | StringLiteral | CharLiteral | RightAssign | LeftAssign | AddAssign + | SubAssign | MulAssign | DivAssign | ModAssign | AndAssign | XorAssign + | OrAssign | RightOp | LeftOp | ThreeRight | IncOp | DecOp | AndOp + | OrOp | LeOp | GeOp | EqOp | NeOp | Semicolon | LeftCurly | RightCurly + | Comma | Colon | Equal | LeftParen | RightParen | LeftSquare + | RightSquare | Dot | Ampersand | Exclamation | Tilde | Minus | Plus + | Asterisk | Slash | Percent | LessThan | GreaterThan | Caret | Pipe + | Question | Unknown + deriving (Show, Read, Eq, Generic) +instance Hashable Tok + +sc :: Parser () +sc = L.space (void spaceChar) (L.skipLineComment "//" <|> L.skipLineComment "#") (L.skipBlockComment "/*" "*/") + +reserved :: String -> Parser String +reserved = try . string + +onetoken :: Parser (Tagged Tok) +onetoken = wrap (reserved "abstract") Abstract + <|> wrap (reserved "assert") Assert + <|> wrap (reserved "boolean") Boolean + <|> wrap (reserved "break") Break + <|> wrap (reserved "byte") Byte + <|> wrap (reserved "case") Case + <|> wrap (reserved "catch") Catch + <|> wrap (reserved "char") Char + <|> wrap (reserved "class") Class + <|> wrap (reserved "const") Const + <|> wrap (reserved "continue") Continue + <|> wrap (reserved "default") Default + <|> wrap (reserved "do") Do + <|> wrap (reserved "double") Double + <|> wrap (reserved "else") Else + <|> wrap (reserved "extends") Extends + <|> wrap (reserved "finally") Finally + <|> wrap (reserved "float") Float + <|> wrap (reserved "for") For + <|> wrap (reserved "goto") Goto + <|> wrap (reserved "if") If + <|> wrap (reserved "implements") Implements + <|> wrap (reserved "import") Import + <|> wrap (reserved "instanceof") InstanceOf + <|> wrap (reserved "int") Int + <|> wrap (reserved "interface") Interface + <|> wrap (reserved "long") Long + <|> wrap (reserved "native") Native + <|> wrap (reserved "new") New + <|> wrap (reserved "package") Package + <|> wrap (reserved "private") Private + <|> wrap (reserved "protected") Protected + <|> wrap (reserved "public") Public + <|> wrap (reserved "return") Return + <|> wrap (reserved "static") Static + <|> wrap (reserved "strictfp") StrictFP + <|> wrap (reserved "super") Super + <|> wrap (reserved "synchronized") Synchronized + <|> wrap (reserved "switch") Switch + <|> wrap (reserved "this") This + <|> wrap (reserved "throw") Throw + <|> wrap (reserved "throws") Throws + <|> wrap (reserved "transient") Transient + <|> wrap (reserved "try") Try + <|> wrap (reserved "void") Void + <|> wrap (reserved "volatile") Volatile + <|> wrap (reserved "while") While + <|> wrap ident Identifier + <|> wrap (show <$> L.integer) IntegerLiteral + <|> wrap (show <$> L.float) FloatLiteral + <|> wrap (show <$> strLit) StringLiteral + <|> wrap (show <$> charLit) CharLiteral + <|> wrap (reserved ">>=") RightAssign + <|> wrap (reserved "<<=") LeftAssign + <|> wrap (reserved ">>>") ThreeRight + <|> wrap (reserved "+=") AddAssign + <|> wrap (reserved "-=") SubAssign + <|> wrap (reserved "*=") MulAssign + <|> wrap (reserved "/=") DivAssign + <|> wrap (reserved "%=") ModAssign + <|> wrap (reserved "&=") AndAssign + <|> wrap (reserved "^=") XorAssign + <|> wrap (reserved "|=") OrAssign + <|> wrap (reserved ">>") RightOp + <|> wrap (reserved "<<") LeftOp + <|> wrap (reserved "++") IncOp + <|> wrap (reserved "--") DecOp + <|> wrap (reserved "&&") AndOp + <|> wrap (reserved "||") OrOp + <|> wrap (reserved "<=") LeOp + <|> wrap (reserved ">=") GeOp + <|> wrap (reserved "==") EqOp + <|> wrap (reserved "!=") NeOp + <|> wrap (reserved ";") Semicolon + <|> wrap (reserved "{") LeftCurly + <|> wrap (reserved "}") RightCurly + <|> wrap (reserved ",") Comma + <|> wrap (reserved ":") Colon + <|> wrap (reserved "=") Equal + <|> wrap (reserved "(") LeftParen + <|> wrap (reserved ")") RightParen + <|> wrap (reserved "[") LeftSquare + <|> wrap (reserved "]") RightSquare + <|> wrap (reserved ".") Dot + <|> wrap (reserved "&") Ampersand + <|> wrap (reserved "!") Exclamation + <|> wrap (reserved "~") Tilde + <|> wrap (reserved "-") Minus + <|> wrap (reserved "+") Plus + <|> wrap (reserved "*") Asterisk + <|> wrap (reserved "/") Slash + <|> wrap (reserved "%") Percent + <|> wrap (reserved "<") LessThan + <|> wrap (reserved ">") GreaterThan + <|> wrap (reserved "^") Caret + <|> wrap (reserved "|") Pipe + <|> wrap (reserved "?") Question + <|> wrap ((:[]) <$> anyChar) Unknown + +lex :: Lexer Tok +lex p d = case runParser (many (sc *> onetoken <* sc)) p d of + Left e -> throwError $ LexError e + Right t -> return t diff --git a/src/Lichen/Lexer/Python.hs b/src/Lichen/Lexer/Python.hs index 42599a24..0c928838 100644 --- a/src/Lichen/Lexer/Python.hs +++ b/src/Lichen/Lexer/Python.hs @@ -37,9 +37,6 @@ sc = L.space (void spaceChar) (L.skipLineComment "#") (L.skipBlockComment "\"\"\ reserved :: String -> Parser String reserved = try . string -pyStrLit :: Parser String -pyStrLit = char '\'' *> manyTill L.charLiteral (char '\'') - onetoken :: Parser (Tagged Tok) onetoken = wrap (reserved "False") Lichen.Lexer.Python.False <|> wrap (reserved "None") None @@ -77,27 +74,39 @@ onetoken = wrap (reserved "False") Lichen.Lexer.Python.False <|> wrap ident Identifier <|> wrap (show <$> L.integer) IntegerLiteral <|> wrap (show <$> L.float) FloatLiteral - <|> wrap (show <$> (strLit <|> pyStrLit)) StringLiteral - <|> wrap (show <$> (char 'b' *> (strLit <|> pyStrLit))) BytesLiteral + <|> wrap (show <$> (strLit <|> charLit)) StringLiteral + <|> wrap (show <$> (char 'b' *> (strLit <|> charLit))) BytesLiteral + <|> wrap (reserved "**=") PowAssign + <|> wrap (reserved "//=") IntDivAssign + <|> wrap (reserved "<<=") LeftAssign + <|> wrap (reserved ">>=") RightAssign + <|> wrap (reserved "+=") AddAssign + <|> wrap (reserved "-=") SubAssign + <|> wrap (reserved "*=") MulAssign + <|> wrap (reserved "/=") DivAssign + <|> wrap (reserved "%=") ModAssign + <|> wrap (reserved "&=") AndAssign + <|> wrap (reserved "|=") OrAssign + <|> wrap (reserved "^=") XorAssign + <|> wrap (reserved "**") DoubleAsterisk + <|> wrap (reserved "//") DoubleSlash + <|> wrap (reserved "==") EqOp + <|> wrap (reserved "!=") NeOp + <|> wrap (reserved "<=") LeOp + <|> wrap (reserved ">=") GeOp + <|> wrap (reserved "<<") LeftOp + <|> wrap (reserved ">>") RightOp <|> wrap (reserved "+") Plus <|> wrap (reserved "-") Minus <|> wrap (reserved "*") Asterisk <|> wrap (reserved "/") Slash - <|> wrap (reserved "//") DoubleSlash <|> wrap (reserved "%") Percent - <|> wrap (reserved "**") DoubleAsterisk - <|> wrap (reserved "==") EqOp - <|> wrap (reserved "!=") NeOp <|> wrap (reserved "<") LessThan <|> wrap (reserved ">") GreaterThan - <|> wrap (reserved "<=") LeOp - <|> wrap (reserved ">=") GeOp <|> wrap (reserved "&") Ampersand <|> wrap (reserved "|") Pipe <|> wrap (reserved "~") Tilde <|> wrap (reserved "^") Caret - <|> wrap (reserved "<<") LeftOp - <|> wrap (reserved ">>") RightOp <|> wrap (reserved "(") LeftParen <|> wrap (reserved ")") RightParen <|> wrap (reserved "[") LeftSquare @@ -110,19 +119,7 @@ onetoken = wrap (reserved "False") Lichen.Lexer.Python.False <|> wrap (reserved ";") Semicolon <|> wrap (reserved "@") At <|> wrap (reserved "=") Equal - <|> wrap (reserved "+=") AddAssign - <|> wrap (reserved "-=") SubAssign - <|> wrap (reserved "*=") MulAssign - <|> wrap (reserved "/=") DivAssign - <|> wrap (reserved "//=") IntDivAssign - <|> wrap (reserved "%=") ModAssign - <|> wrap (reserved "**=") PowAssign - <|> wrap (reserved "&=") AndAssign - <|> wrap (reserved "|=") OrAssign - <|> wrap (reserved "^=") XorAssign - <|> wrap (reserved "<<=") LeftAssign - <|> wrap (reserved ">>=") RightAssign - <|> wrap ((:[]) <$> L.charLiteral) Unknown + <|> wrap ((:[]) <$> anyChar) Unknown lex :: Lexer Tok lex p d = case runParser (many (sc *> onetoken <* sc)) p d of diff --git a/src/Lichen/Plagiarism/AssignmentSettings.hs b/src/Lichen/Plagiarism/AssignmentSettings.hs index 3e617e3b..4891fae4 100644 --- a/src/Lichen/Plagiarism/AssignmentSettings.hs +++ b/src/Lichen/Plagiarism/AssignmentSettings.hs @@ -11,7 +11,7 @@ import qualified Data.ByteString.Lazy as BS import Control.Monad.Except import Lichen.Error -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config data VersionTime = VersionTime { version :: Int, time :: String } instance FromJSON VersionTime where parseJSON = withObject "version_time" $ \o -> VersionTime <$> o .: "version" <*> o .: "time" diff --git a/src/Lichen/Plagiarism/Concatenate.hs b/src/Lichen/Plagiarism/Concatenate.hs index 78b87e7a..6552e90c 100644 --- a/src/Lichen/Plagiarism/Concatenate.hs +++ b/src/Lichen/Plagiarism/Concatenate.hs @@ -14,8 +14,8 @@ import Control.Arrow (second) import Text.Read import Lichen.Util -import Lichen.Config.Languages -import Lichen.Config.Plagiarism +import Lichen.Languages +import Lichen.Plagiarism.Config import Lichen.Plagiarism.AssignmentSettings -- Given a student submission directory, parse the students diff --git a/src/Lichen/Config/Plagiarism.hs b/src/Lichen/Plagiarism/Config.hs similarity index 98% rename from src/Lichen/Config/Plagiarism.hs rename to src/Lichen/Plagiarism/Config.hs index c5641599..e7ff1951 100644 --- a/src/Lichen/Config/Plagiarism.hs +++ b/src/Lichen/Plagiarism/Config.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, RecordWildCards #-} -module Lichen.Config.Plagiarism where +module Lichen.Plagiarism.Config where import Data.Maybe import Data.Aeson @@ -9,7 +9,7 @@ import qualified Data.Text as T import qualified Text.Blaze.Html5 as H import Lichen.Config -import Lichen.Config.Languages +import Lichen.Languages newtype PathGenerator = PathGenerator { runPathGenerator :: Config -> String -> String -> H.AttributeValue } instance FromJSON PathGenerator where diff --git a/src/Lichen/Plagiarism/Highlight.hs b/src/Lichen/Plagiarism/Highlight.hs index ab07d84f..a0824163 100644 --- a/src/Lichen/Plagiarism/Highlight.hs +++ b/src/Lichen/Plagiarism/Highlight.hs @@ -8,7 +8,7 @@ import Control.Monad.Reader import Control.Arrow ((&&&)) import Lichen.Util -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config highlight :: FilePath -> Plagiarism () highlight p = do diff --git a/src/Lichen/Plagiarism/Main.hs b/src/Lichen/Plagiarism/Main.hs index 487548d7..9f3b29b6 100644 --- a/src/Lichen/Plagiarism/Main.hs +++ b/src/Lichen/Plagiarism/Main.hs @@ -19,8 +19,8 @@ import Options.Applicative import Lichen.Util import Lichen.Error import Lichen.Config -import Lichen.Config.Languages -import Lichen.Config.Plagiarism +import Lichen.Languages +import Lichen.Plagiarism.Config import Lichen.Plagiarism.Concatenate import Lichen.Plagiarism.Highlight import Lichen.Plagiarism.Report diff --git a/src/Lichen/Plagiarism/Render.hs b/src/Lichen/Plagiarism/Render.hs index 81f2367f..1563d24d 100644 --- a/src/Lichen/Plagiarism/Render.hs +++ b/src/Lichen/Plagiarism/Render.hs @@ -18,7 +18,7 @@ import qualified Clay.Font as C.F import Language.Javascript.JMacro import Lichen.Util -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config hs :: Show a => a -> H.Html hs = H.toHtml . sq diff --git a/src/Lichen/Plagiarism/Render/Index.hs b/src/Lichen/Plagiarism/Render/Index.hs index 04e153fe..75023094 100644 --- a/src/Lichen/Plagiarism/Render/Index.hs +++ b/src/Lichen/Plagiarism/Render/Index.hs @@ -12,7 +12,7 @@ import qualified Text.Blaze.Html5.Attributes as A import Numeric import Lichen.Util -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config import Lichen.Plagiarism.Render renderEntry :: Show a => Config -> (Double, (b, a), (b, a)) -> H.Html diff --git a/src/Lichen/Plagiarism/Report.hs b/src/Lichen/Plagiarism/Report.hs index 6ba190e0..f99c722f 100644 --- a/src/Lichen/Plagiarism/Report.hs +++ b/src/Lichen/Plagiarism/Report.hs @@ -12,7 +12,7 @@ import Control.Monad.Reader import Text.Blaze.Html.Renderer.Utf8 import Lichen.Util -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config import Lichen.Plagiarism.Winnow import Lichen.Plagiarism.Compare import Lichen.Plagiarism.Render diff --git a/src/Lichen/Plagiarism/Shared.hs b/src/Lichen/Plagiarism/Shared.hs index 3dec0e58..6a5dd526 100644 --- a/src/Lichen/Plagiarism/Shared.hs +++ b/src/Lichen/Plagiarism/Shared.hs @@ -3,7 +3,7 @@ module Lichen.Plagiarism.Shared where import qualified Data.Set as Set import qualified Data.Map.Strict as Map -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config import Lichen.Plagiarism.Winnow findShared :: Config -> [Fingerprints] -> [Fingerprints] -> Set.Set Fingerprint diff --git a/src/Lichen/Plagiarism/Walk.hs b/src/Lichen/Plagiarism/Walk.hs index c1d379d1..57f87cdc 100644 --- a/src/Lichen/Plagiarism/Walk.hs +++ b/src/Lichen/Plagiarism/Walk.hs @@ -7,8 +7,8 @@ import qualified Data.ByteString as BS import Control.Monad.Reader -import Lichen.Config.Languages -import Lichen.Config.Plagiarism +import Lichen.Languages +import Lichen.Plagiarism.Config import Lichen.Plagiarism.Winnow -- Given language configuration and the path to a directory containing diff --git a/src/Lichen/Plagiarism/Winnow.hs b/src/Lichen/Plagiarism/Winnow.hs index 39430a8e..e5e46b98 100644 --- a/src/Lichen/Plagiarism/Winnow.hs +++ b/src/Lichen/Plagiarism/Winnow.hs @@ -6,9 +6,9 @@ import qualified Data.ByteString as BS import Control.Monad.Trans +import Lichen.Languages import Lichen.Lexer -import Lichen.Config.Languages -import Lichen.Config.Plagiarism +import Lichen.Plagiarism.Config type Fingerprint = Tagged Int type Fingerprints = [Fingerprint] @@ -86,4 +86,4 @@ processTokens config = winnow (signalThreshold config) (noiseThreshold config) -- Cannot use record syntax here due to type variable selection processCode :: Language -> FilePath -> BS.ByteString -> Plagiarism Fingerprints -processCode (Language _ llex c _ _) p src = lift $ processTokens c <$> llex p src +processCode (Language _ c _ l _) p src = lift $ processTokens c <$> l p src diff --git a/src/Lichen/Util.hs b/src/Lichen/Util.hs index 273bdbe5..af1e1983 100644 --- a/src/Lichen/Util.hs +++ b/src/Lichen/Util.hs @@ -33,6 +33,7 @@ purifySnd ((x, Just y):xs) = (x, y):purifySnd xs -- Ex: containingDir "/usr/bin/gcc" = "/usr/bin" containingDir :: FilePath -> FilePath +containingDir [] = [] containingDir p = (if head p == '/' then ('/':) else id) . foldr1 () . init . splitOn "/" $ p removeDir :: FilePath -> IO () @@ -67,11 +68,21 @@ sq = go . show where | otherwise = s go x = x +printColor :: MonadIO m => Color -> m () -> m () +printColor c body = liftIO (hSetSGR stderr [SetColor Foreground Vivid c]) >> body >> liftIO (hSetSGR stderr [Reset]) + +notify :: MonadIO m => m () -> m () +notify = printColor Green + +warn :: MonadIO m => m () -> m () +warn = printColor Yellow + +err :: MonadIO m => m () -> m () +err = printColor Red + progress :: MonadIO m => String -> m a -> m a progress msg body = do - liftIO (hPutStr stderr (msg <> "...")) + liftIO . hPutStr stderr $ msg <> "... " ret <- body - liftIO (hSetSGR stderr [SetColor Foreground Vivid Green]) - liftIO (hPutStrLn stderr " Done!") - liftIO (hSetSGR stderr [Reset]) + liftIO . notify $ hPutStrLn stderr "Done!" return ret