From b48c99d8b82885bfaf3d718c57e80457ec320727 Mon Sep 17 00:00:00 2001 From: Daniel Lidstrom Date: Fri, 29 May 2020 11:02:45 +0200 Subject: [PATCH 1/4] Small docker image --- Dockerfile | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..626ab2b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM alpine:3.11 AS build +ARG DUPLO_VERSION=v1.0.0 + +RUN apk --no-cache add \ + alpine-sdk cmake + +RUN mkdir -p /usr/src/ && \ + git clone https://github.com/dlidstrom/Duplo /usr/src/Duplo + +WORKDIR /usr/src/Duplo + +RUN mkdir build && \ + cd build && \ + cmake .. -DDUPLO_VERSION=\"$DUPLO_VERSION\" && \ + make + +FROM alpine:3.11 + +RUN apk --no-cache add libstdc++ + +COPY --from=build /usr/src/Duplo/build/duplo . + +ENTRYPOINT ["./duplo"] From 3845533ac5fc00903d5f1387467f7bfbcc86a4c4 Mon Sep 17 00:00:00 2001 From: Daniel Lidstrom Date: Fri, 29 May 2020 11:14:38 +0200 Subject: [PATCH 2/4] Build and push docker image --- .github/workflows/ccpp.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 81fc339..eb57045 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -72,6 +72,23 @@ jobs: with: name: uploads path: duplo-macos.zip + push-docker-image: + runs-on: ubuntu-latest + needs: [bump-tag-dry] + - name: download artifacts + uses: actions/download-artifact@v1 + with: + name: uploads + - name: set version + run: export DUPLO_VERSION=`cat ./uploads/tag.txt` + - uses: docker/build-push-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: dlidstrom/duplo + tag_with_ref: true + # push: ${{ startsWith(github.ref, 'refs/tags/') }} + build_args: DUPLO_VERSION=$DUPLO_VERSION upload-release: if: success() && github.ref == 'refs/heads/master' runs-on: ubuntu-latest From ff7e750d18408804e5485e2aeba4f22ee7f8be68 Mon Sep 17 00:00:00 2001 From: Daniel Lidstrom Date: Fri, 29 May 2020 12:46:14 +0200 Subject: [PATCH 3/4] Build and push image --- .github/workflows/ccpp.yml | 10 +- README.markdown | 221 ------------------------------------- 2 files changed, 8 insertions(+), 223 deletions(-) delete mode 100644 README.markdown diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index eb57045..4f3d475 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -75,12 +75,18 @@ jobs: push-docker-image: runs-on: ubuntu-latest needs: [bump-tag-dry] + steps: - name: download artifacts uses: actions/download-artifact@v1 with: name: uploads - name: set version - run: export DUPLO_VERSION=`cat ./uploads/tag.txt` + run: echo "::set-env name=DUPLO_VERSION::$(cat ./uploads/tag.txt)" + - name: print version + run: echo $DUPLO_VERSION + - uses: actions/checkout@master + with: + fetch-depth: '0' - uses: docker/build-push-action@v1 with: username: ${{ secrets.DOCKER_USERNAME }} @@ -88,7 +94,7 @@ jobs: repository: dlidstrom/duplo tag_with_ref: true # push: ${{ startsWith(github.ref, 'refs/tags/') }} - build_args: DUPLO_VERSION=$DUPLO_VERSION + build_args: DUPLO_VERSION=${{ env.DUPLO_VERSION }} upload-release: if: success() && github.ref == 'refs/heads/master' runs-on: ubuntu-latest diff --git a/README.markdown b/README.markdown deleted file mode 100644 index 60d0403..0000000 --- a/README.markdown +++ /dev/null @@ -1,221 +0,0 @@ -# 1. Duplo (C/C++/Java Duplicate Source Code Block Finder) - -![C/C++ CI](https://github.com/dlidstrom/Duplo/workflows/C/C++%20CI/badge.svg) - -- [1. Duplo (C/C++/Java Duplicate Source Code Block Finder)](#1-duplo-ccjava-duplicate-source-code-block-finder) - - [1.1. General Information](#11-general-information) - - [1.2. Maintainer](#12-maintainer) - - [1.3. File Format Support](#13-file-format-support) - - [1.4. Installation](#14-installation) - - [1.5. Usage](#15-usage) - - [1.5.1. Passing files using `stdin`](#151-passing-files-using-stdin) - - [1.5.2. Passing files using file](#152-passing-files-using-file) - - [1.5.3. Xml output](#153-xml-output) - - [1.6. Feedback and Bug Reporting](#16-feedback-and-bug-reporting) - - [1.7. Algorithm Background](#17-algorithm-background) - - [1.7.1. Performance Measurements](#171-performance-measurements) - - [1.8. Developing](#18-developing) - - [1.8.1. Unix](#181-unix) - - [1.8.2. Windows](#182-windows) - - [1.8.3. Additional Language Support](#183-additional-language-support) - - [1.8.4. Language Suggestions](#184-language-suggestions) - - [1.9. Changes](#19-changes) - - [1.10. License](#110-license) - -## 1.1. General Information - -Duplicated source code blocks can harm maintainability of software systems. -Duplo is a tool to find duplicated code blocks in large C, C++, Java, C# and -VB.Net systems. - -## 1.2. Maintainer - -Duplo was originally developed by Christian -M. Ammann and is now maintained and developed by Daniel -Lidström. - -## 1.3. File Format Support - -Duplo has built in support for the following -file formats: - -- C/C++ (.c, .cpp, .cxx, .h, .hpp) -- Java -- C# -- VB -- GCC assembly - -This means that Duplo will remove -preprocessor directives, block comments, using -statements, etc, to only consider duplicates -in actual code. -In addition, Duplo can be used as a general -(without special support) duplicates detector -in arbitrary text files and will even detect -duplicates found in the same file. - -Sample output snippet: - -```txt -... -src\engine\geometry\simple\TorusGeometry.cpp(56) -src\engine\geometry\simple\SphereGeometry.cpp(54) - pBuffer[currentIndex*size+3]=(i+1)/(float)subdsU; - pBuffer[currentIndex*size+4]=j/(float)subdsV; - currentIndex++; - pPrimitiveBuffer->unlock(); - -src\engine\geometry\subds\SubDsGeometry.cpp(37) -src\engine\geometry\SkinnedMeshGeometry.cpp(45) - pBuffer[i*size+0]=m_ct[0]->m_pColors[i*3]; - pBuffer[i*size+1]=m_ct[0]->m_pColors[i*3+1]; - pBuffer[i*size+2]=m_ct[0]->m_pColors[i*3+2]; -... -``` - -## 1.4. Installation - -Duplo is currently available prebuilt for linux and macos. Grab the executable from the [releases](https://github.com/dlidstrom/Duplo/releases) page. - -You can of course build from source as well, and you'll currently have to do so for Windows. - -## 1.5. Usage - -Duplo works with a list of files. You can either specify a file that contains the list of files, or you can pass them using `stdin`. - -Run `duplo --help` on the command line to see the detailed options. - -### 1.5.1. Passing files using `stdin` - -```bash -# unix -> find . \( -iname "*.cpp" -o -iname "*.h" \) | duplo - out.txt - -# windows -> Get-ChildItem -Include "*.cpp", "*.h" -Recurse | % { $_.FullName } | Duplo.exe - out.txt -``` - -`duplo` will write the duplicated blocks into `out.txt`. - -### 1.5.2. Passing files using file - -`duplo` can analyze files specified in a separate file: - -```bash -# unix -> find . -type f \( -iname "*.cpp" -o -iname "*.h" \) > files.lst -> duplo files.lst out.txt - -# windows -> Get-ChildItem -Include "*.cpp", "*.h" -Recurse | % { $_.FullName } | Out-File -encoding ascii files.lst -> Duplo.exe files.lst out.txt -``` - -### 1.5.3. Xml output - -Duplo can also output xml and there is a stylesheet that will format the result for viewing in a browser. This can be used as a report tab in your continuous integration tool (TeamCity, etc). - -## 1.6. Feedback and Bug Reporting - -Please open an issue to discuss feedback, -feature requests and bug reports. - -## 1.7. Algorithm Background - -Duplo uses the same techniques as Duploc to detect duplicated code blocks. See -[Duca99bCodeDuplication](http://scg.unibe.ch/archive/papers/Duca99bCodeDuplication.pdf) for -further information. - -### 1.7.1. Performance Measurements - -| System | Files | Loc's | Time | -|-|-|-|-| -| Quake2 | 266 | 102740 | 18sec | - -## 1.8. Developing - -### 1.8.1. Unix - -You need `CMake` and preferrably `fswatch` for the best experience. - -```bash -# build dependencies -/> brew install cmake -/> brew install fswatch -``` - -Compiling is best done using the continuous file watcher: - -```bash -# CMake builds in the build folder -/> mkdir build -/> pushd build -build/> cmake .. -# now issue make -build/> make -build/> popd -# continuous build can now be used in root folder -# (needs fswatch) -> ./watch.sh -``` - -### 1.8.2. Windows - -Use Visual Studio 2019 to open the included solution file (or try `CMake`). - -### 1.8.3. Additional Language Support - -Duplo can analyze all text files regardless of format, but it has special support for some programming languages (C++, C#, Java, for example). This allows Duplo to improve the duplication detection as it can ignore preprocessor directives and/or comments. - -To implement support for a new language, there are a couple of options (in order of complexity): - -1. Implement `FileTypeBase` which has support for handling comments and preprocessor directives. You just need to decide what is a comment. With this option you need to implement a couple of methods, one which is `CreateLineFilter`. This is to remove multiline comments. Look at `CstyleCommentsFilter` for an example. -2. Implement `IFileType` interface directly. This gives you the most freedom but also is the hardest option of course. - -### 1.8.4. Language Suggestions - -- JavaScript (easy, just look at the existing C-based ones) -- Ruby -- Python -- Perl -- PHP -- Rust -- F# -- Scala -- Haskell -- Erlang -- What else? - -Send me a pull request! - -## 1.9. Changes - -- 0.4 - - Significant performance improvements - - Using modern C++ techniques - - Modularized to simplify adding support of new file formats - - Can pass files using `stdin` -- 0.3 - - Updated links in html output to GitHub - - Support for gcc assembly (.s) - - Fixed minimum number of lines in analysis - - Fixed limitation of total number of lines of code - - Checking of arbitrary files - -## 1.10. License - -Duplo is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -Duplo is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Duplo; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - ---- From 0aec33473bc57a1b3dcc06b7d212be0bb133d480 Mon Sep 17 00:00:00 2001 From: Daniel Lidstrom Date: Fri, 29 May 2020 12:46:25 +0200 Subject: [PATCH 4/4] Updated documentation --- Docker.md | 70 ++++++++++++++++ README.md | 235 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+) create mode 100644 Docker.md create mode 100644 README.md diff --git a/Docker.md b/Docker.md new file mode 100644 index 0000000..27f01a1 --- /dev/null +++ b/Docker.md @@ -0,0 +1,70 @@ +# Duplo (C/C++/Java Duplicate Source Code Block Finder) + +This project is found in GitHub: [https://github.com/dlidstrom/Duplo](https://github.com/dlidstrom/Duplo). + +## General Information + +Duplicated source code blocks can harm maintainability of software systems. +Duplo is a tool to find duplicated code blocks in large C, C++, Java, C# and +VB.Net systems. + +## Maintainer + +Duplo was originally developed by Christian +M. Ammann and is now maintained and developed by Daniel +Lidström. + +## Usage + +Duplo works with a list of files. You can either specify a file that contains the list of files, or you can pass them using `stdin`. + +Run `duplo --help` on the command line to see the detailed options. + +### Passing files using `stdin` + +```bash +# unix +> find . \( -iname "*.cpp" -o -iname "*.h" \) | docker run dlidstrom/duplo - out.txt + +# windows +> Get-ChildItem -Include "*.cpp", "*.h" -Recurse | % { $_.FullName } | docker run dlidstrom/duplo - out.txt +``` + +`duplo` will write the duplicated blocks into `out.txt`. + +### Passing files using file + +`duplo` can analyze files specified in a separate file: + +```bash +# unix +> find . -type f \( -iname "*.cpp" -o -iname "*.h" \) > files.lst +> docker run dlidstrom/duplo files.lst out.txt + +# windows +> Get-ChildItem -Include "*.cpp", "*.h" -Recurse | % { $_.FullName } | Out-File -encoding ascii files.lst +> docker run dlidstrom/duplo.exe files.lst out.txt +``` + +## Feedback and Bug Reporting + +Please open a GitHub issue to discuss feedback, +feature requests and bug reports. + +## License + +Duplo is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +Duplo is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Duplo; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +--- diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c967c6 --- /dev/null +++ b/README.md @@ -0,0 +1,235 @@ +# 1. Duplo (C/C++/Java Duplicate Source Code Block Finder) + +![C/C++ CI](https://github.com/dlidstrom/Duplo/workflows/C/C++%20CI/badge.svg) + +- [1. Duplo (C/C++/Java Duplicate Source Code Block Finder)](#1-duplo-ccjava-duplicate-source-code-block-finder) + - [1.1. General Information](#11-general-information) + - [1.2. Maintainer](#12-maintainer) + - [1.3. File Format Support](#13-file-format-support) + - [1.4. Installation](#14-installation) + - [1.4.1. Docker](#141-docker) + - [1.4.2. Pre-built binaries](#142-pre-built-binaries) + - [1.5. Usage](#15-usage) + - [1.5.1. Passing files using `stdin`](#151-passing-files-using-stdin) + - [1.5.2. Passing files using file](#152-passing-files-using-file) + - [1.5.3. Xml output](#153-xml-output) + - [1.6. Feedback and Bug Reporting](#16-feedback-and-bug-reporting) + - [1.7. Algorithm Background](#17-algorithm-background) + - [1.7.1. Performance Measurements](#171-performance-measurements) + - [1.8. Developing](#18-developing) + - [1.8.1. Unix](#181-unix) + - [1.8.2. Windows](#182-windows) + - [1.8.3. Additional Language Support](#183-additional-language-support) + - [1.8.4. Language Suggestions](#184-language-suggestions) + - [1.9. Changes](#19-changes) + - [1.10. 1.10. License](#110-110-license) + +## 1.1. General Information + +Duplicated source code blocks can harm maintainability of software systems. +Duplo is a tool to find duplicated code blocks in large C, C++, Java, C# and +VB.Net systems. + +## 1.2. Maintainer + +Duplo was originally developed by Christian +M. Ammann and is now maintained and developed by Daniel +Lidström. + +## 1.3. File Format Support + +Duplo has built in support for the following +file formats: + +- C/C++ (.c, .cpp, .cxx, .h, .hpp) +- Java +- C# +- VB +- GCC assembly + +This means that Duplo will remove +preprocessor directives, block comments, using +statements, etc, to only consider duplicates +in actual code. +In addition, Duplo can be used as a general +(without special support) duplicates detector +in arbitrary text files and will even detect +duplicates found in the same file. + +Sample output snippet: + +```txt +... +src\engine\geometry\simple\TorusGeometry.cpp(56) +src\engine\geometry\simple\SphereGeometry.cpp(54) + pBuffer[currentIndex*size+3]=(i+1)/(float)subdsU; + pBuffer[currentIndex*size+4]=j/(float)subdsV; + currentIndex++; + pPrimitiveBuffer->unlock(); + +src\engine\geometry\subds\SubDsGeometry.cpp(37) +src\engine\geometry\SkinnedMeshGeometry.cpp(45) + pBuffer[i*size+0]=m_ct[0]->m_pColors[i*3]; + pBuffer[i*size+1]=m_ct[0]->m_pColors[i*3+1]; + pBuffer[i*size+2]=m_ct[0]->m_pColors[i*3+2]; +... +``` + +## 1.4. Installation + +### 1.4.1. Docker + +If you have Docker, the easiest way to run Duplo is to: + +```bash +> docker run dlidstrom/duplo +``` + +This pulls the latest image and runs duplo. In the usage section below, use this command in place of `duplo` or `Duplo.exe`. + +### 1.4.2. Pre-built binaries + +Duplo is also available as a pre-built binary for (alpine) linux and macos. Grab the executable from the [releases](https://github.com/dlidstrom/Duplo/releases) page. + +You can of course build from source as well, and you'll have to do so to get a binary for Windows. + +## 1.5. Usage + +Duplo works with a list of files. You can either specify a file that contains the list of files, or you can pass them using `stdin`. + +Run `duplo --help` on the command line to see the detailed options. + +### 1.5.1. Passing files using `stdin` + +```bash +# unix +> find . \( -iname "*.cpp" -o -iname "*.h" \) | duplo - out.txt + +# windows +> Get-ChildItem -Include "*.cpp", "*.h" -Recurse | % { $_.FullName } | Duplo.exe - out.txt +``` + +`duplo` will write the duplicated blocks into `out.txt`. + +### 1.5.2. Passing files using file + +`duplo` can analyze files specified in a separate file: + +```bash +# unix +> find . -type f \( -iname "*.cpp" -o -iname "*.h" \) > files.lst +> duplo files.lst out.txt + +# windows +> Get-ChildItem -Include "*.cpp", "*.h" -Recurse | % { $_.FullName } | Out-File -encoding ascii files.lst +> Duplo.exe files.lst out.txt +``` + +### 1.5.3. Xml output + +Duplo can also output xml and there is a stylesheet that will format the result for viewing in a browser. This can be used as a report tab in your continuous integration tool (TeamCity, etc). + +## 1.6. Feedback and Bug Reporting + +Please open an issue to discuss feedback, +feature requests and bug reports. + +## 1.7. Algorithm Background + +Duplo uses the same techniques as Duploc to detect duplicated code blocks. See +[Duca99bCodeDuplication](http://scg.unibe.ch/archive/papers/Duca99bCodeDuplication.pdf) for +further information. + +### 1.7.1. Performance Measurements + +| System | Files | Loc's | Time | +|-|-|-|-| +| Quake2 | 266 | 102740 | 18sec | + +## 1.8. Developing + +### 1.8.1. Unix + +You need `CMake` and preferrably `fswatch` for the best experience. + +```bash +# build dependencies +/> brew install cmake +/> brew install fswatch +``` + +Compiling is best done using the continuous file watcher: + +```bash +# CMake builds in the build folder +/> mkdir build +/> pushd build +build/> cmake .. +# now issue make +build/> make +build/> popd +# continuous build can now be used in root folder +# (needs fswatch) +> ./watch.sh +``` + +### 1.8.2. Windows + +Use Visual Studio 2019 to open the included solution file (or try `CMake`). + +### 1.8.3. Additional Language Support + +Duplo can analyze all text files regardless of format, but it has special support for some programming languages (C++, C#, Java, for example). This allows Duplo to improve the duplication detection as it can ignore preprocessor directives and/or comments. + +To implement support for a new language, there are a couple of options (in order of complexity): + +1. Implement `FileTypeBase` which has support for handling comments and preprocessor directives. You just need to decide what is a comment. With this option you need to implement a couple of methods, one which is `CreateLineFilter`. This is to remove multiline comments. Look at `CstyleCommentsFilter` for an example. +2. Implement `IFileType` interface directly. This gives you the most freedom but also is the hardest option of course. + +### 1.8.4. Language Suggestions + +- JavaScript (easy, just look at the existing C-based ones) +- Ruby +- Python +- Perl +- PHP +- Rust +- F# +- Scala +- Haskell +- Erlang +- What else? + +Send me a pull request! + +## 1.9. Changes + +- 0.4 + - Significant performance improvements + - Using modern C++ techniques + - Modularized to simplify adding support of new file formats + - Can pass files using `stdin` +- 0.3 + - Updated links in html output to GitHub + - Support for gcc assembly (.s) + - Fixed minimum number of lines in analysis + - Fixed limitation of total number of lines of code + - Checking of arbitrary files + +## 1.10. 1.10. License + +Duplo is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +Duplo is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Duplo; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +---