Skip to content

Commit

Permalink
Miscellaneous Fixes (#55)
Browse files Browse the repository at this point in the history
* Fix lexer bugs, usability updates, structural refactor, improved error messages

* Add Java lexer
  • Loading branch information
chameco authored and bmcutler committed Oct 11, 2017
1 parent 82249bd commit c497bf4
Show file tree
Hide file tree
Showing 29 changed files with 258 additions and 92 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ deploy:
file:
- count
- plagiarism
- diagnostics
skip_cleanup: true
on:
tags: true
Expand Down
5 changes: 2 additions & 3 deletions app/count/Main.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import Lichen.Config.Languages
import Lichen.Config.Count
import Lichen.Count.Config
import Lichen.Count.Main

main :: IO ()
main = realMain $ defaultConfig { language = langC }
main = realMain defaultConfig
2 changes: 1 addition & 1 deletion app/diagnostics/Main.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import Lichen.Config.Diagnostics
import Lichen.Diagnostics.Config
import Lichen.Diagnostics.Main

main :: IO ()
Expand Down
5 changes: 2 additions & 3 deletions app/plagiarism/Main.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import Lichen.Config.Languages
import Lichen.Config.Plagiarism
import Lichen.Plagiarism.Config
import Lichen.Plagiarism.Main

main :: IO ()
main = realMain $ defaultConfig { language = langC }
main = realMain defaultConfig
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

VERSION=$1
BINARIES="count plagiarism"
BINARIES="count plagiarism diagnostics"

if [ -z $VERSION ]
then VERSION="latest"
Expand Down
18 changes: 12 additions & 6 deletions lichen.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: lichen
version: 0.2.7
version: 0.3.0
synopsis: Plagiarism Detection and Other Static Analysis
description: Please see README.md
homepage: https://github.com/Submitty/AnalysisTools
Expand All @@ -14,17 +14,19 @@ library
hs-source-dirs: src
exposed-modules: Lichen.Util
, Lichen.Error
, Lichen.Languages
, Lichen.Config
, Lichen.Config.Languages
, Lichen.Config.Plagiarism
, Lichen.Config.Count
, Lichen.Config.Diagnostics

, Lichen.Lexer
, Lichen.Lexer.C
, Lichen.Lexer.Python
, Lichen.Lexer.Java

, Lichen.Parser
, Lichen.Parser.Python

, Lichen.Plagiarism.Main
, Lichen.Plagiarism.Config
, Lichen.Plagiarism.Winnow
, Lichen.Plagiarism.Compare
, Lichen.Plagiarism.Walk
Expand All @@ -36,9 +38,13 @@ library
, Lichen.Plagiarism.Render
, Lichen.Plagiarism.Render.Index
, Lichen.Plagiarism.Render.Compare

, Lichen.Count.Main
, Lichen.Count.Config
, Lichen.Count.Counters

, Lichen.Diagnostics.Main
, Lichen.Diagnostics.Config
build-depends: base >= 4.7 && < 5
, containers
, split
Expand All @@ -60,7 +66,7 @@ library
, jmacro
, QuickCheck
, hspec
ghc-options: -O2 -Wall -fwarn-incomplete-patterns -fno-warn-orphans
ghc-options: -O2 -Wall
default-language: Haskell2010

executable plagiarism
Expand Down
4 changes: 2 additions & 2 deletions src/Lichen/Config/Count.hs → src/Lichen/Count/Config.hs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{-# LANGUAGE OverloadedStrings, RecordWildCards #-}

module Lichen.Config.Count where
module Lichen.Count.Config where

import Data.Maybe
import Data.Aeson

import Lichen.Config
import Lichen.Config.Languages
import Lichen.Languages
import Lichen.Count.Counters

data Config = Config
Expand Down
4 changes: 2 additions & 2 deletions src/Lichen/Count/Counters.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import qualified Data.ByteString as BS
import Control.Monad.Except

import Lichen.Error
import Lichen.Config.Languages
import Lichen.Languages
import Lichen.Lexer
import Lichen.Util
import qualified Lichen.Parser as P
Expand All @@ -24,7 +24,7 @@ counterDummy :: Counter
counterDummy = Counter $ \_ _ _ -> throwError $ InvocationError "Invalid counting method specified"

counterToken :: Counter
counterToken = Counter $ \(Language _ l _ readTok _) t p -> do
counterToken = Counter $ \(Language _ _ readTok l _) t p -> do
ssrc <- liftIO $ readSafe (liftIO . BS.readFile) (throwError $ InvocationError "File not found") p
src <- ssrc
tokens <- l p src
Expand Down
4 changes: 2 additions & 2 deletions src/Lichen/Count/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import Options.Applicative
import Lichen.Util
import Lichen.Error
import Lichen.Config
import Lichen.Config.Languages
import Lichen.Config.Count
import Lichen.Languages
import Lichen.Count.Config
import Lichen.Count.Counters

parseOptions :: Config -> Parser Config
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{-# LANGUAGE OverloadedStrings, RecordWildCards #-}

module Lichen.Config.Diagnostics where
module Lichen.Diagnostics.Config where

import Data.Maybe
import Data.Aeson

import Lichen.Config
import Lichen.Config.Languages
import Lichen.Languages

data Config = Config
{ dataDir :: FilePath
Expand Down
8 changes: 4 additions & 4 deletions src/Lichen/Diagnostics/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ import Options.Applicative
import Lichen.Util
import Lichen.Error
import Lichen.Config
import Lichen.Config.Languages
import Lichen.Config.Diagnostics
import Lichen.Languages
import Lichen.Diagnostics.Config
import Lichen.Plagiarism.Concatenate
import Lichen.Plagiarism.Compare
import Lichen.Plagiarism.Walk
import Lichen.Plagiarism.Winnow
import Lichen.Plagiarism.AssignmentSettings
import qualified Lichen.Config.Plagiarism as Plagiarism
import qualified Lichen.Plagiarism.Config as Plagiarism

compareTag :: String -> [(Fingerprints, String)] -> [(Double, (Fingerprints, String), (Fingerprints, String))]
compareTag t prints = case ours of Just x -> curry compareFingerprints x <$> prints
Expand All @@ -40,7 +40,7 @@ compareTag t prints = case ours of Just x -> curry compareFingerprints x <$> pri
ours = tagged prints

diagnosticsToken :: Language -> FilePath -> Erring Pair
diagnosticsToken (Language _ l _ _ _) p = do
diagnosticsToken (Language _ _ _ l _) p = do
src <- liftIO $ BS.readFile p
tokens <- l p src
return $ T.pack p .= toJSON tokens
Expand Down
12 changes: 7 additions & 5 deletions src/Lichen/Error.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import Text.Megaparsec.Error (ParseError, parseErrorPretty)

import Control.Monad.Except (ExceptT)

import Lichen.Util

type Erring = ExceptT LichenError IO

data LichenError = LexError (ParseError (Token BS.ByteString) Dec)
Expand All @@ -24,8 +26,8 @@ data LichenError = LexError (ParseError (Token BS.ByteString) Dec)
deriving Show

printError :: LichenError -> IO ()
printError (LexError e) = T.IO.hPutStrLn stderr "Lexer error: " >> hPutStrLn stderr (parseErrorPretty e)
printError (ParseError t) = T.IO.hPutStrLn stderr ("Parser error: " <> t)
printError (InvalidTokenError t) = T.IO.hPutStrLn stderr ("Invalid token error: " <> t)
printError (InvocationError t) = T.IO.hPutStrLn stderr ("Invocation error: " <> t)
printError (JSONDecodingError t) = T.IO.hPutStrLn stderr ("JSON decoding error: " <> t)
printError (LexError e) = err $ T.IO.hPutStrLn stderr "Lexer error: " >> hPutStrLn stderr (parseErrorPretty e)
printError (ParseError t) = err $ T.IO.hPutStrLn stderr ("Parser error: " <> t)
printError (InvalidTokenError t) = err $ T.IO.hPutStrLn stderr ("Invalid token error: " <> t)
printError (InvocationError t) = err $ T.IO.hPutStrLn stderr ("Invocation error: " <> t)
printError (JSONDecodingError t) = err $ T.IO.hPutStrLn stderr ("JSON decoding error: " <> t)
20 changes: 13 additions & 7 deletions src/Lichen/Config/Languages.hs → src/Lichen/Languages.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings, GADTs, DeriveGeneric, StandaloneDeriving #-}

module Lichen.Config.Languages where
module Lichen.Languages where

import GHC.Generics

Expand All @@ -17,6 +17,7 @@ import Lichen.Lexer
import Lichen.Parser
import qualified Lichen.Lexer.C as C
import qualified Lichen.Lexer.Python as Python
import qualified Lichen.Lexer.Java as Java
import qualified Lichen.Parser.Python as Python

-- Configuration for the winnowing algorithm. Token sequences shorter than
Expand All @@ -35,30 +36,33 @@ instance FromJSON WinnowConfig
-- is an HLint bug, and can safely be ignored.
data Language where
Language :: (Hashable a, Show a) => { exts :: [FilePath]
, lexer :: Lexer a
, winnowConfig :: WinnowConfig
, readToken :: String -> Erring a
, lexer :: Lexer a
, parser :: Parser Node
} -> Language
instance FromJSON Language where
parseJSON (String s) = pure $ languageChoice langDummy (Just $ T.unpack s)
parseJSON _ = pure langDummy

dummy :: a -> b -> Erring c
dummy _ _ = throwError $ InvocationError "Specified analysis method is undefined for language"
dummy :: T.Text -> a -> b -> Erring c
dummy t _ _ = throwError $ InvocationError t

smartRead :: Read a => String -> Erring a
smartRead s = case readMaybe s of Just t -> pure t
Nothing -> throwError . InvalidTokenError $ T.pack s

langDummy :: Language
langDummy = Language [] dummy (WinnowConfig 0 0) (const $ pure ()) dummy
langDummy = Language [] (WinnowConfig 0 0) (const $ pure ()) (dummy "No valid language specified") (dummy "No valid language specified")

langC :: Language
langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] C.lex (WinnowConfig 16 9) (smartRead :: String -> Erring C.Tok) dummy
langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] (WinnowConfig 16 9) (smartRead :: String -> Erring C.Tok) C.lex (dummy "The C tooling does not currently support the requested feature")

langPython :: Language
langPython = Language [".py"] Python.lex (WinnowConfig 16 9) (smartRead :: String -> Erring Python.Tok) Python.parse
langPython = Language [".py"] (WinnowConfig 16 9) (smartRead :: String -> Erring Python.Tok) Python.lex Python.parse

langJava :: Language
langJava = Language [".java"] (WinnowConfig 16 9) (smartRead :: String -> Erring Java.Tok) Java.lex (dummy "The Java tooling does not currently support the requested feature")

languageChoice :: Language -> Maybe String -> Language
languageChoice d Nothing = d
Expand All @@ -67,4 +71,6 @@ languageChoice _ (Just "c") = langC
languageChoice _ (Just "Python") = langPython
languageChoice _ (Just "python") = langPython
languageChoice _ (Just "py") = langPython
languageChoice _ (Just "java") = langJava
languageChoice _ (Just "Java") = langJava
languageChoice _ _ = langDummy
4 changes: 2 additions & 2 deletions src/Lichen/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ wrap p x = do
return . Tagged x $ TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos (fromIntegral $ length s))

-- Parse a C-style character literal. Ex: 'a', '@'.
charLit :: Parser Char
charLit = char '\'' *> L.charLiteral <* char '\''
charLit :: Parser String
charLit = char '\'' *> manyTill L.charLiteral (char '\'')

-- Parse a C-style string literal. Ex: "a", "hello, world".
strLit :: Parser String
Expand Down
9 changes: 4 additions & 5 deletions src/Lichen/Lexer/C.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ module Lichen.Lexer.C where
import GHC.Generics (Generic)

import Control.Monad
--import Control.Monad.Except
import Control.Monad.Except

import Data.Hashable

import Text.Megaparsec
import Text.Megaparsec.ByteString
import qualified Text.Megaparsec.Lexer as L

--import Lichen.Error
import Lichen.Error
import Lichen.Lexer

data Tok = Auto | Break | Case | Char | Const | Continue | Default | Do
Expand Down Expand Up @@ -137,10 +137,9 @@ onetoken = wrap (reserved "auto") Auto
<|> wrap (reserved "^") Caret
<|> wrap (reserved "|") Pipe
<|> wrap (reserved "?") Question
<|> wrap ((:[]) <$> L.charLiteral) Unknown
<|> wrap ((:[]) <$> anyChar) Unknown

lex :: Lexer Tok
lex p d = case runParser (many (sc *> onetoken <* sc)) p d of
--Left e -> throwError $ LexError e
Left _ -> return []
Left e -> throwError $ LexError e
Right t -> return t
Loading

0 comments on commit c497bf4

Please sign in to comment.