diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 01f5bc17..93a1003e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,8 +30,8 @@ jobs:
       - name: Check out pipeline code
         uses: actions/checkout@v3
 
-      - name: Install Nextflow
-        uses: nf-core/setup-nextflow@v1
+      - name: Set up Nextflow
+        uses: nf-core/setup-nextflow@v2
         with:
           version: "${{ matrix.NXF_VER }}"
 
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 1fcafe88..1282c673 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -44,7 +44,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install nf-core
+          pip install nf-core==2.14.1
 
       - name: Run nf-core lint
         env:
diff --git a/.github/workflows/pytest_ci.yml b/.github/workflows/pytest_ci.yml
new file mode 100644
index 00000000..98008834
--- /dev/null
+++ b/.github/workflows/pytest_ci.yml
@@ -0,0 +1,32 @@
+name: Python Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main, dev]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: [3.9]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements-dev.txt
+
+      - name: Run tests
+        run: |
+          pytest
diff --git a/.gitignore b/.gitignore
index 175fdca1..d3523a85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,46 @@
+# Ignore Nextflow-specific files, including logs, cache, and temporary files
 .nextflow*
+
+# Ignore Nextflow's work directory where intermediate files are stored
 work/
-data/
+
+# Ignore results directory, as this is typically generated output that can be large and recreated
 results/
+
+# MacOS specific hidden file that stores folder view settings
 .DS_Store
+
 testing/
 testing*
-*.pyc
+
 dbs/
+
+node_modules/
+
+# Optional: ignore any temporary files created by Python or text editors
+__pycache__/  # Python cache directory
+*.pyc
+*.pyo  # Python optimized bytecode files
+*.pkl  # Pickle files (often generated during data processing)
+
+# Ignore any virtual environment directories used to isolate Python dependencies
+venv/
+env/
+*.venv/
+
+# Ignore Jupyter Notebook checkpoints, if notebooks are used for analysis or reporting
+.ipynb_checkpoints/
+
+# Ignore any temporary, swap, or backup files created by editors like Vim or Emacs
+*~
+*.swp
+*.swo
+*.bak
+
+.coverage
+htmlcov/
+*.cover
+reports/
+trace/
+.cache/
+logs/
diff --git a/.nf-core.yml b/.nf-core.yml
index 1f060a16..58700dad 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,4 +1,8 @@
 repository_type: pipeline
+org_path: ebi-metagenomics
+template:
+  prefix: ebi-metagenomics
+    - github_badges
 lint:
   files_exist:
     - CODE_OF_CONDUCT.md
@@ -26,10 +30,12 @@ lint:
     - docs/images/nf-core-mettannotator_logo_light.png
     - docs/images/nf-core-mettannotator_logo_dark.png
     - .github/ISSUE_TEMPLATE/bug_report.yml
-  nextflow_config: False
-    - params.custom_config_version
-    - params.custom_config_base
-    - manifest.name
-    - manifest.homePage
+    - .github/workflows/linting.yml
+    - .github/CONTRIBUTING.md
+    - .github/workflows/branch.yml
+    - .github/workflows/linting_comment.yml
+    - .github/PULL_REQUEST_TEMPLATE.md
+    - .gitignore
+  nextflow_config: false
   multiqc_config:
     - report_comment
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0c31cdb9..890027e0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,20 @@
 repos:
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: "v2.7.1"
+    rev: "v4.0.0-alpha.8"
     hooks:
       - id: prettier
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/psf/black
+    rev: 24.8.0
+    hooks:
+      - id: black
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.8
+    hooks:
+      - id: ruff
+        args: [--fix]
diff --git a/.ruff.toml b/.ruff.toml
new file mode 100644
index 00000000..87ea8418
--- /dev/null
+++ b/.ruff.toml
@@ -0,0 +1,12 @@
+line-length = 120
+target-version = "py38"
+cache-dir = "~/.cache/ruff"
+
+[lint]
+select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"]
+
+[lint.isort]
+known-first-party = ["nf_core"]
+
+[lint.per-file-ignores]
+"__init__.py" = ["E402", "F401"]
diff --git a/CITATIONS.md b/CITATIONS.md
index a115ddce..e4c55605 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -62,6 +62,10 @@
 
   > Sanchez S, Rogers JD, Rogers AB, Nassar M, McEntyre J, Welch M, Hollfelder F, Finn RD. Expansion of novel biosynthetic gene clusters from diverse environments using SanntiS. bioRxiv 2023.05.23.540769; doi: https://doi.org/10.1101/2023.05.23.540769
 
+- [Pseudofinder](https://doi.org/10.1093/molbev/msac153)
+
+  > Syberg-Olsen MJ, Garber AI, Keeling PJ, McCutcheon JP, Husnik F. Pseudofinder: Detection of Pseudogenes in Prokaryotic Genomes. Mol Biol Evol. 2022 Jul 2;39(7):msac153. doi: 10.1093/molbev/msac153. PMID: 35801562; PMCID: PMC9336565.
+
 - [run_dbCAN](https://pubmed.ncbi.nlm.nih.gov/37125649/)
 
   > Zheng J, Ge Q, Yan Y, Zhang X, Huang L, Yin Y. dbCAN3: automated carbohydrate-active enzyme and substrate annotation. Nucleic Acids Res. 2023 Jul 5;51(W1):W115-W121. doi: 10.1093/nar/gkad328. PMID: 37125649; PMCID: PMC10320055.
diff --git a/README.md b/README.md
index dce9f002..da773c3c 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@
 - [ Usage ](#usage)
 - [ Test ](#test)
 - [ Outputs ](#out)
+- [Preparing annotations for ENA or GenBank submission](#submission)
 - [ Mobilome annotation ](#mobilome)
 - [ Credits ](#credit)
 - [ Contributions and Support ](#contribute)
@@ -39,6 +40,8 @@ The workflow uses the following tools and databases:
 | [Prokka](https://github.com/tseemann/prokka)                                                     | 1.14.6                                        | CDS calling and functional annotation (default)                                                                        |
 | [Bakta](https://github.com/oschwengers/bakta)                                                    | 1.9.3                                         | CDS calling and functional annotation (if --bakta flag is used)                                                        |
 | [Bakta db](https://zenodo.org/record/10522951/)                                                  | 2024-01-19 with AMRFinderPlus DB 2024-01-31.1 | Bakta DB (when Bakta is used as the gene caller)                                                                       |
+| [Pseudofinder](https://github.com/filip-husnik/pseudofinder)                                     | v1.1.0                                        | Identification of possible pseudogenes                                                                                 |
+| [Swiss-Prot](https://www.uniprot.org/help/downloads)                                             | 2024_06                                       | Database for Pseudofinder                                                                                              |
 | [InterProScan](https://www.ebi.ac.uk/interpro/about/interproscan/)                               | 5.62-94.0                                     | Protein annotation (InterPro, Pfam)                                                                                    |
 | [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper)                                       | 2.1.11                                        | Protein annotation (eggNOG, KEGG, COG, GO-terms)                                                                       |
 | [eggNOG DB](http://eggnog6.embl.de/download/)                                                    | 5.0.2                                         | Database for eggNOG-mapper                                                                                             |
@@ -89,7 +92,8 @@ The pipeline needs reference databases in order to work, they take roughly 180G.
 | interproscan        | 45G  |
 | interpro_entry_list | 2.6M |
 | rfam_models         | 637M |
-| total               | 180G |
+| pseudofinder        | 273M |
+| total               | 182G |
 
 `mettannotator` has an automated mechanism to download the databases using the `--dbs <db_path>` flag. When this flag is provided, the pipeline inspects the folder to verify if the required databases are already present. If any of the databases are missing, the pipeline will automatically download them.
 
@@ -177,8 +181,12 @@ Note, that by default the script uses FASTA file names as prefixes and truncates
 
 Running `mettannotator` with the `--help` option will pull the repository and display the help message:
 
+> [!NOTE]
+> We use the `-latest` flag with the `nextflow run` command, which ensures that the latest available version of the pipeline is pulled.
+> If you encounter any issues with the `nextflow run` command, please refer to the [Nextflow documentation](https://www.nextflow.io/docs/latest/reference/cli.html#run).
+
 ```angular2html
-nextflow run ebi-metagenomics/mettannotator/main.nf --help
+$ nextflow run -latest ebi-metagenomics/mettannotator/main.nf --help
 N E X T F L O W  ~  version 23.04.3
 Launching `mettannotator/main.nf` [disturbed_davinci] DSL2 - revision: f2a0e51af6
 
@@ -229,6 +237,8 @@ Reference databases
                                                database for version 4.0 on this ftp location:
                                                ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/dbcan/dbcan_4.0.tar.gz
   --dbcan_db_version                 [string]  The dbCAN reference database version. [default: 4.1.3_V12]
+  --pseudofinder_db                  [string]  Pseudofinder reference database. Mettannotator uses SwissProt as the database for Pseudofinder.
+  --pseudofinder_db_version          [string]  SwissProt version. [default: 2024_06]
 
 Generic options
   --multiqc_methods_description      [string]  Custom MultiQC yaml file containing HTML including a methods description.
@@ -259,17 +269,25 @@ nextflow run ebi-metagenomics/mettannotator \
    --dbs <PATH/TO/WHERE/DBS/WILL/BE/SAVED>
 ```
 
-> **Warning:**
+> [!WARNING]
 > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those
 > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
 > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
 
+#### Running the pipeline from the source code
+
+If the Nextflow integration with Git does not work, users can download the tarball from the releases page. After extracting the tarball, the pipeline can be run directly by executing the following command:
+
+```bash
+$ nextflow run path-to-source-code/main.nf --help
+```
+
 #### Local execution
 
 The pipeline can be run on a desktop or laptop, with the caveat that it will take a few hours to complete depending on the resources. There is a local profile in the Nextflow config that limits the total resources the pipeline can use to 8 cores and 12 GB of RAM. In order to run it (Docker or Singularity are still required):
 
 ```bash
-nextflow run ebi-metagenomics/mettannotator \
+nextflow run -latest ebi-metagenomics/mettannotator \
    -profile local,<docker or singulairty> \
    --input assemblies_sheet.csv \
    --outdir <OUTDIR> \
@@ -302,7 +320,7 @@ To run the pipeline using a test dataset, execute the following command:
 ```bash
 wget https://raw.githubusercontent.com/EBI-Metagenomics/mettannotator/master/tests/test.csv
 
-nextflow run ebi-metagenomics/mettannotator \
+nextflow run -latest ebi-metagenomics/mettannotator \
    -profile <docker/singularity/...> \
    --input test.csv \
    --outdir <OUTDIR> \
@@ -331,6 +349,7 @@ The output folder structure will look as follows:
    │  ├─interproscan
    │  ├─merged_gff
    │  ├─prokka
+   │  ├─pseudofinder
    │  └─unifire
    ├─mobilome
    │  └─crisprcas_finder
@@ -429,12 +448,49 @@ The following logic is used by `mettannotator` to fill out the `product` field i
 
 If the pipeline is executed with the `--fast` flag, only the output of eggNOG-mapper is used to determine the product of proteins that were labeled as hypothetical by the gene caller.
 
+#### Detection of pseudogenes and spurious ORFs
+
+`mettannotator` uses several approaches to detect pseudogenes and spurious ORFs:
+
+- If Bakta is used as the initial annotation tool, `mettannotator` will inherit the pseudogene labels assigned by Bakta.
+- `mettannotator` runs Pseudofinder and labels genes that Pseudofinder predicts to be pseudogenes by adding `"pseudo=true"` to the 9th column of the final merged GFF file. If there is a disagreement between Pseudofinder and Bakta and one of the tools calls a gene a pseudogene, it will be labeled as a pseudogene.
+- AntiFam, which is a part of InterPro, is used to identify potential spurious ORFs. If an ORF has an AntiFam hit, `mettannotator` will remove it from the final merged GFF file. These ORFs will still appear in the raw outputs of Bakta/Prokka and may appear in other tool outputs.
+
+`mettannotator` produces a report file which is located in the `merged_gff` folder and includes a list of CDS with AntiFam hits and pseudogenes. For each pseudogene, the report shows which tool predicted it.
+
 ### Contents of the tool output folders
 
 The output folders of each individual tool contain select output files of the third-party tools used by `mettannotator`. For file descriptions, please refer to the tool documentation. For some tools that don't output a GFF, `mettannotator` converts the output into a GFF.
 
 Note: if the pipeline completed without errors but some of the tool-specific output folders are empty, those particular tools did not generate any annotations to output.
 
+<a name="submission"></a>
+
+## Preparing annotations for ENA or GenBank submission
+
+`mettannotator` produces a final annotation file in GFF3 format. To submit the annotations to data archives, it is first necessary to convert the GFF3 file into the required format, using third-party tools available. `mettannotator` outputs a specially formatted GFF3 file, named `<prefix>_submission.gff` to be used with converters.
+
+### ENA
+
+ENA accepts annotations in the EMBL flat-file format.
+Please use [EMBLmyGFF3](https://github.com/NBISweden/EMBLmyGFF3) to perform the conversion; the repository includes detailed instructions. The two files required for conversion are:
+
+- the genome FASTA file
+- `<mettannotator_results_folder>/<prefix>/functional_annotation/merged_gff/<prefix>_submission.gff`
+
+Please note that it is necessary to register the project and locus tags in ENA prior to conversion. Follow links in the [EMBLmyGFF3](https://github.com/NBISweden/EMBLmyGFF3) repository for more details.
+
+### GenBank
+
+To convert annotations for GenBank submission, please use [table2asn](https://www.ncbi.nlm.nih.gov/genbank/table2asn/).
+Three files are required:
+
+- the genome FASTA file
+- `<mettannotator_results_folder>/<prefix>/functional_annotation/merged_gff/<prefix>_submission.gff`
+- Submission template file (can be generated [here](https://submit.ncbi.nlm.nih.gov/genbank/template/submission/))
+
+More instructions on running `table2asn` are available via [GenBank](https://www.ncbi.nlm.nih.gov/genbank/genomes_gff/).
+
 <a name="mobilome"></a>
 
 ## Mobilome annotation
diff --git a/bin/__init__.py b/bin/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/bin/add_hypothetical_protein_descriptions.py b/bin/add_hypothetical_protein_descriptions.py
index f4d9157f..ea93ef34 100755
--- a/bin/add_hypothetical_protein_descriptions.py
+++ b/bin/add_hypothetical_protein_descriptions.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
 # Copyright 2024 EMBL - European Bioinformatics Institute
 #
@@ -19,6 +18,7 @@
 import logging
 import re
 import sys
+from enum import Enum
 
 logging.basicConfig(level=logging.INFO)
 
@@ -28,29 +28,41 @@
 MINIMUM_IPR_MATCH = 0.10
 
 
+class GeneCaller(Enum):
+    PROKKA = "Prokka"
+    BAKTA = "Bakta"
+
+
 def main(ipr_types_file, ipr_file, hierarchy_file, eggnog_file, infile, outfile):
     eggnog_info = load_eggnog(eggnog_file)
     if ipr_file:
         levels = load_hierarchy(hierarchy_file)
         ipr_types = load_ipr_types(ipr_types_file)
-        ipr_info, ipr_memberdb_only, ipr_leveled_info = load_ipr(
-            ipr_file, ipr_types, levels
-        )
+        ipr_info, ipr_memberdb_only, _ = load_ipr(ipr_file, ipr_types, levels)
     else:
         ipr_info = dict()
         ipr_memberdb_only = dict()
-    gene_caller = "Prokka"
+
+    gene_caller = GeneCaller.PROKKA
     fasta_flag = False
-    with open(infile, "r") as file_in, open(outfile, "w") as file_out:
+    with open(infile) as file_in, open(outfile, "w") as file_out:
         for line in file_in:
             if not fasta_flag:
                 if line.startswith("##FASTA"):
                     fasta_flag = True
                     file_out.write(line)
                 elif not line.startswith("#"):
-                    contig, tool, feature, start, end, blank1, strand, blank2, col9 = (
-                        line.strip().split("\t")
-                    )
+                    (
+                        contig,
+                        tool,
+                        feature,
+                        start,
+                        end,
+                        blank1,
+                        strand,
+                        blank2,
+                        col9,
+                    ) = line.strip().split("\t")
                     if feature == "CDS":
                         attributes_dict = dict(
                             re.split(r"(?<!\\)=", item)
@@ -69,18 +81,20 @@ def main(ipr_types_file, ipr_file, hierarchy_file, eggnog_file, infile, outfile)
                             if not function_source == "UniFIRE":
                                 found_function = clean_up_function(found_function)
                                 if function_source == "eggNOG":
-                                    found_function, function_source, attributes_dict = (
-                                        keep_or_move_to_note(
-                                            found_function,
-                                            function_source,
-                                            attributes_dict,
-                                            gene_caller,
-                                        )
+                                    (
+                                        found_function,
+                                        function_source,
+                                        attributes_dict,
+                                    ) = keep_or_move_to_note(
+                                        found_function,
+                                        function_source,
+                                        attributes_dict,
+                                        gene_caller,
                                     )
                             found_function = escape_reserved_characters(found_function)
                             attributes_dict["product"] = found_function
                             if (
-                                gene_caller == "Bakta"
+                                gene_caller == GeneCaller.BAKTA
                                 and attributes_dict["Name"] == "hypothetical protein"
                             ):
                                 attributes_dict["Name"] = found_function
@@ -89,7 +103,7 @@ def main(ipr_types_file, ipr_file, hierarchy_file, eggnog_file, infile, outfile)
                             )
                         else:
                             attributes_dict = insert_product_source(
-                                attributes_dict, gene_caller
+                                attributes_dict, gene_caller.value
                             )
                         col9_updated = update_col9(attributes_dict)
                         file_out.write(
@@ -113,12 +127,14 @@ def main(ipr_types_file, ipr_file, hierarchy_file, eggnog_file, infile, outfile)
                 else:
                     file_out.write(line)
                     if "Bakta" in line:
-                        gene_caller = "Bakta"
+                        gene_caller = GeneCaller.BAKTA
             else:
                 file_out.write(line)
 
 
-def keep_or_move_to_note(found_function, function_source, col9_dict, gene_caller):
+def keep_or_move_to_note(
+    found_function, function_source, col9_dict, gene_caller: GeneCaller
+):
     """
     Function aims to identify if a description is likely to be a sentence/paragraph rather than
     a succinct function description. If it's the former, move it to note and revert function to
@@ -291,18 +307,18 @@ def keep_or_move_to_note(found_function, function_source, col9_dict, gene_caller
         if move_to_note:
             col9_dict = move_function_to_note(found_function, col9_dict)
             found_function = "hypothetical protein"
-            function_source = gene_caller
+            function_source = gene_caller.value
     else:
         # Product is too long, move to note
         col9_dict = move_function_to_note(found_function, col9_dict)
         found_function = "hypothetical protein"
-        function_source = gene_caller
+        function_source = gene_caller.value
     return found_function, function_source, col9_dict
 
 
 def move_function_to_note(found_function, col9_dict):
-    if "note" in col9_dict.keys():
-        col9_dict["note"] = col9_dict["note"] + ", eggNOG:" + found_function
+    if "Note" in col9_dict.keys():
+        col9_dict["Note"] = col9_dict["Note"] + ", eggNOG:" + found_function
         return col9_dict
     else:
         # insert note after locus tag
@@ -310,7 +326,7 @@ def move_function_to_note(found_function, col9_dict):
         locus_tag_index = keys_list.index("locus_tag")
         return (
             {k: col9_dict[k] for k in keys_list[: locus_tag_index + 1]}
-            | {"note": "eggNOG:" + found_function}
+            | {"Note": "eggNOG:" + found_function}
             | {k: col9_dict[k] for k in keys_list[locus_tag_index + 1 :]}
         )
 
@@ -344,7 +360,7 @@ def count_initial_dashes(s):
 def load_hierarchy(parent_child_file):
     counter = dict()
     levels = dict()
-    with open(parent_child_file, "r") as file_in:
+    with open(parent_child_file) as file_in:
         for line in file_in:
             depth = count_initial_dashes(line)
             term = line.lstrip("-").strip().split("::")[0]
@@ -368,7 +384,12 @@ def insert_product_source(my_dict, source):
 
 
 def get_function(
-    acc, attributes_dict, eggnog_annot, ipr_info, ipr_memberdb_only, gene_caller
+    acc,
+    attributes_dict,
+    eggnog_annot,
+    ipr_info,
+    ipr_memberdb_only,
+    gene_caller: GeneCaller,
 ):
     """
     Identify function by carrying it over from a db match. The following priority is used:
@@ -390,7 +411,6 @@ def get_function(
     :param ipr_memberdb_only: annotations that don't have IPR accessions but are from InterPro member databases
     :return: function, source db
     """
-
     if "uf_prot_rec_fullname" in attributes_dict:
         return attributes_dict["uf_prot_rec_fullname"], "UniFIRE"
     if acc in ipr_info and "Family" in ipr_info[acc]:
@@ -407,12 +427,14 @@ def get_function(
         )
         if func_description:
             return func_description, source
+
     if acc in ipr_info and "Homologous_superfamily" in ipr_info[acc]:
         func_description, source = get_superfamily_info(
             ipr_info[acc]["Homologous_superfamily"]
         )
         if func_description:
             return func_description, source
+
     if acc in ipr_memberdb_only and any(
         db in {"SUPERFAMILY", "Gene3D"}
         for db in ipr_memberdb_only[acc]["no_type"].keys()
@@ -423,11 +445,14 @@ def get_function(
             if key in ipr_memberdb_only[acc]["no_type"]:
                 subset_dict[key] = ipr_memberdb_only[acc]["no_type"][key]
         func_description, source = get_superfamily_info(subset_dict)
+
         if func_description:
             return func_description, source
+
     if acc in eggnog_annot:
         return eggnog_annot[acc], "eggNOG"
-    return "hypothetical protein", gene_caller
+
+    return "hypothetical protein", gene_caller.value
 
 
 def get_description_and_source(my_dict, ipr_type):
@@ -463,7 +488,7 @@ def get_superfamily_info(my_dict):
 
 def format_source(db, source):
     if source == "ipr_desc":
-        return "InterPro({})".format(db)
+        return f"InterPro({db})"
     else:
         return db
 
@@ -475,7 +500,7 @@ def get_best_match(ipr_dict):
     for db in ipr_dict:
         if ipr_dict[db]["level"] is None:
             ipr_dict[db]["level"] = 0
-        if not db.lower() in ["superfamily", "gene3d"] and (
+        if db.lower() not in ["superfamily", "gene3d"] and (
             ipr_dict[db]["sig_desc"] != "-" or ipr_dict[db]["ipr_desc"] != "-"
         ):
             if ipr_dict[db]["level"] > best_level or (
@@ -524,13 +549,13 @@ def pull_out_description(my_dict, first_priority, second_priority):
 
 def load_eggnog(file):
     eggnog_info = dict()
-    with open(file, "r") as file_in:
+    with open(file) as file_in:
         for line in file_in:
             if not line.startswith("#"):
                 cols = line.strip().split("\t")
                 try:
                     evalue = float(cols[2])
-                except:
+                except:  # noqa: E722
                     continue
                 if evalue > EVALUE_CUTOFF:
                     continue
@@ -751,7 +776,7 @@ def clean_up_eggnog_function(func_description):
 
 def load_ipr_types(ipr_types_file):
     ipr_types = dict()
-    with open(ipr_types_file, "r") as file_in:
+    with open(ipr_types_file) as file_in:
         for line in file_in:
             if line.startswith("IPR"):
                 acc, type, _ = line.strip().split("\t")
@@ -764,7 +789,7 @@ def load_ipr(file, ipr_types, ipr_levels):
     ipr_leveled_info = dict()
     ipr_memberdb_only = dict()  # hit only exists in a member database
 
-    with open(file, "r") as file_in:
+    with open(file) as file_in:
         for line in file_in:
             cols = line.strip().split("\t")
             (
@@ -900,7 +925,7 @@ def escape_reserved_characters(string):
             if ch == ";":
                 string = string.replace(ch, "/")
             else:
-                string = string.replace(ch, "\{}".format(ch))
+                string = string.replace(ch, f"\{ch}")
     return string
 
 
diff --git a/bin/add_interpro_descriptions.py b/bin/add_interpro_descriptions.py
index 5fa601d7..7ab433f0 100755
--- a/bin/add_interpro_descriptions.py
+++ b/bin/add_interpro_descriptions.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
 # Copyright 2024 EMBL - European Bioinformatics Institute
 #
@@ -21,7 +20,7 @@
 
 def main(ipr_types_file, infile, outfile):
     ipr_types_and_descriptions = load_ipr(ipr_types_file)
-    with open(infile, "r") as file_in, open(outfile, "w") as file_out:
+    with open(infile) as file_in, open(outfile, "w") as file_out:
         fasta_flag = False
         for line in file_in:
             if not fasta_flag:
@@ -108,7 +107,7 @@ def load_ipr(ipr_types_file):
         "PTM": "S",
         "Repeat": "R",
     }
-    with open(ipr_types_file, "r") as file_in:
+    with open(ipr_types_file) as file_in:
         for line in file_in:
             if line.startswith("IPR"):
                 acc, ipr_type, desc = line.strip().split("\t")
diff --git a/bin/add_locus_tag_to_trna.py b/bin/add_locus_tag_to_trna.py
new file mode 100755
index 00000000..d1889bc0
--- /dev/null
+++ b/bin/add_locus_tag_to_trna.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+# Copyright 2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import re
+
+
+def main(input_file, output_file):
+    with open(input_file) as file_in, open(output_file, "w") as file_out:
+        for line in file_in:
+            if line.startswith("#") or not line.strip():
+                # Write header or empty lines as is
+                file_out.write(line)
+                continue
+
+            columns = line.strip().split("\t")
+            if len(columns) < 9:
+                file_out.write(line)
+                continue
+
+            if columns[2] != "tRNA":
+                file_out.write(line)
+                continue
+
+            # Parse the attributes (column 9)
+            attributes = columns[8].rstrip(";")
+            attributes_dict = dict(
+                re.split(r"(?<!\\)=", item)
+                for item in re.split(r"(?<!\\);", attributes)
+            )
+
+            # Add locus_tag based on the ID field
+            if "ID" in attributes_dict:
+                locus_tag = attributes_dict["ID"]
+                attributes = attributes + f";locus_tag={locus_tag}"
+
+            columns[8] = attributes
+            file_out.write("\t".join(columns) + "\n")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=("The script adds locus tags to the 9th column.")
+    )
+    parser.add_argument(
+        "-i",
+        dest="infile",
+        required=True,
+        help="The path to the input GFF.",
+    )
+    parser.add_argument(
+        "-o",
+        dest="outfile",
+        required=True,
+        help="Path to the output file where the result will be saved.",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        args.infile,
+        args.outfile,
+    )
diff --git a/bin/adjust_pseudofinder_output.py b/bin/adjust_pseudofinder_output.py
new file mode 100755
index 00000000..94ed1ad4
--- /dev/null
+++ b/bin/adjust_pseudofinder_output.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+
+# Copyright 2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import filecmp
+import logging
+import re
+import shutil
+import sys
+
+logging.basicConfig(level=logging.INFO)
+
+
+def main(pseudofinder_file, standard_gff, compliant_gff, outfile):
+    # Check if the two GFF files are identical. If they are we don't need to do anything
+    if filecmp.cmp(standard_gff, compliant_gff, shallow=False):
+        logging.info(
+            "Standard and compliant GFFs are identical, no changed to Pseudofinder output needed"
+        )
+        shutil.copyfile(pseudofinder_file, outfile)
+    else:
+        chromosome_dictionary = make_chromosome_conversion(standard_gff, compliant_gff)
+        standard_genes = load_gff(standard_gff, dict())
+        compliant_genes = load_gff(compliant_gff, chromosome_dictionary)
+        name_translation = match_ids(standard_genes, compliant_genes)
+        with open(outfile, "w") as file_out, open(pseudofinder_file) as file_in:
+            for line in file_in:
+                if line.startswith("##sequence-region"):
+                    parts = line.split(" ")
+                    # replace chromosome name
+                    try:
+                        parts[1] = chromosome_dictionary[parts[1]]
+                        line = " ".join(parts)
+                    except KeyError:
+                        logging.error(f"Could not convert sequence region {line}")
+                    file_out.write(line)
+                elif line.startswith("#"):
+                    file_out.write(line)
+                else:
+                    parts = line.strip().split("\t")
+                    attributes_dict = dict(
+                        re.split(r"(?<!\\)=", item)
+                        for item in re.split(r"(?<!\\);", parts[8])
+                    )
+                    for attribute_name, attribute_value in attributes_dict.items():
+                        if attribute_name == "old_locus_tag":
+                            locus_tags = attribute_value.split(",")
+                            new_locus_tags = list()
+                            for locus_tag in locus_tags:
+                                if "_ign_" not in locus_tag:
+                                    try:
+                                        new_locus_tags.append(
+                                            name_translation[locus_tag]
+                                        )
+                                    except KeyError:
+                                        logging.error(
+                                            f"Locus tag {locus_tag} cannot be converted between two GFF "
+                                            f" files. Skipping"
+                                        )
+                                else:
+                                    # these are in the intergenic regions; they will be in the pseudofinder output
+                                    # but not in the final genome annotation file
+                                    new_locus_tags.append(locus_tag)
+                            new_locus_tag_value = ",".join(new_locus_tags)
+                            attributes_dict["old_locus_tag"] = new_locus_tag_value
+                    new_col_9 = ""
+                    for key, value in attributes_dict.items():
+                        new_col_9 += f"{key}={value};"
+                    parts[8] = new_col_9.rstrip(";")
+                    parts[0] = chromosome_dictionary[parts[0]]
+                    modified_line = "\t".join(parts)
+                    file_out.write(modified_line + "\n")
+
+
+def make_chromosome_conversion(standard_gff, compliant_gff):
+    chromosomes_standard = get_chromosomes(standard_gff)
+    chromosomes_compliant = get_chromosomes(compliant_gff)
+    chromosome_dictionary = dict()
+    standard_keys = list(chromosomes_standard.keys())
+    compliant_keys = list(chromosomes_compliant.keys())
+    for i, chr_compliant in enumerate(compliant_keys):
+        chr_standard = standard_keys[i]
+        if chromosomes_standard[chr_standard] == chromosomes_compliant[chr_compliant]:
+            chromosome_dictionary[chr_compliant] = chr_standard
+        else:
+            sys.exit(
+                f"Pseudofinder port-processing failed: unable to convert chromosome names. "
+                f"Failed on {chr_standard} and {chr_compliant}"
+            )
+    return chromosome_dictionary
+
+
+def get_chromosomes(gff):
+    chromosomes = dict()
+    with open(gff) as file_in:
+        for line in file_in:
+            if line.startswith("##sequence-region"):
+                _, chromosome, start, end = line.strip().split(" ")
+                if "|" in chromosome:
+                    chromosome = chromosome.split("|")[-1]
+                chromosomes[chromosome] = f"{start}-{end}"
+    return chromosomes
+
+
+def match_ids(standard_genes, compliant_genes):
+    name_translation = {
+        compliant_genes[key]: standard_genes[key]
+        for key in compliant_genes
+        if key in standard_genes
+    }
+    # log genes that are not found in both dictionaries
+    unique_to_standard = set(standard_genes.keys()) - set(compliant_genes.keys())
+    unique_to_compliant = set(compliant_genes.keys()) - set(standard_genes.keys())
+    if len(unique_to_standard) > 0:
+        logging.info("The following genes were only identified in the standard GFF:")
+        for gene in unique_to_standard:
+            logging.info(gene)
+    if len(unique_to_compliant) > 0:
+        logging.info("The following genes were only identified in the compliant GFF:")
+        for gene in unique_to_compliant:
+            logging.info(gene)
+    return name_translation
+
+
+def load_gff(gff_file, chromosome_dictionary):
+    genes = dict()
+    with open(gff_file) as file_in:
+        for line in file_in:
+            if line.startswith("##FASTA"):
+                return genes
+            elif line.startswith("#"):
+                continue
+            else:
+                chromosome, _, feature, start, end, _, strand, _, col9 = (
+                    line.strip().split("\t")
+                )
+                if "|" in chromosome:
+                    chromosome = chromosome.split("|")[-1]
+                if chromosome in chromosome_dictionary:
+                    chromosome = chromosome_dictionary[chromosome]
+                if feature == "CDS":
+                    attributes_dict = dict(
+                        re.split(r"(?<!\\)=", item)
+                        for item in re.split(r"(?<!\\);", col9)
+                    )
+                    location = f"{chromosome}={start}={end}={strand}"
+                    genes[location] = attributes_dict["locus_tag"]
+    return genes
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=(
+            "The script corrects gene IDs to account for possible discrepancies between two "
+            "annotation runs (usually generated by Prokka which is run twice in mettannotator)."
+        )
+    )
+    parser.add_argument(
+        "--pseudofinder-output",
+        required=True,
+        help="The path to the pseudogene GFF file produced by Pseudofinder.",
+    )
+    parser.add_argument(
+        "--standard-gff",
+        required=True,
+        help="The path to the GFF that contains correct gene IDs used in other annotations.",
+    )
+    parser.add_argument(
+        "--compliant-gff",
+        required=True,
+        help="The path to the compliant GFF that was used for Pseudofinder.",
+    )
+    parser.add_argument(
+        "-o",
+        "--outfile",
+        required=True,
+        help="Path to the output file where the result will be saved.",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        args.pseudofinder_output,
+        args.standard_gff,
+        args.compliant_gff,
+        args.outfile,
+    )
diff --git a/bin/annotate_gff.py b/bin/annotate_gff.py
index e2f92dfe..3ca78263 100755
--- a/bin/annotate_gff.py
+++ b/bin/annotate_gff.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,6 +15,8 @@
 #
 
 import argparse
+import csv
+import re
 import sys
 
 
@@ -30,15 +31,17 @@ def main(
     gecco_file,
     dbcan_file,
     defense_finder_file,
+    pseudofinder_file,
     rfam_file,
     trnascan_file,
     outfile,
+    pseudogene_report_file,
 ):
     # load annotations and add them to existing CDS
     # here header contains leading GFF lines starting with "#",
     # main_gff_extended is a dictionary that contains Prokka GFF lines with added in additional annotations
     # fasta is the fasta portion of the Prokka GFF files
-    header, main_gff_extended, fasta = load_annotations(
+    header, main_gff_extended, fasta, pseudogene_report_dict = load_annotations(
         gff,
         eggnog_file,
         ipr_file,
@@ -48,6 +51,7 @@ def main(
         gecco_file,
         dbcan_file,
         defense_finder_file,
+        pseudofinder_file,
     )
 
     ncrnas = get_ncrnas(rfam_file)
@@ -60,6 +64,9 @@ def main(
         outfile, header, main_gff_extended, fasta, ncrnas, trnas, crispr_annotations
     )
 
+    if pseudogene_report_file:
+        print_pseudogene_report(pseudogene_report_dict, pseudogene_report_file)
+
 
 def write_results_to_file(
     outfile, header, main_gff_extended, fasta, ncrnas, trnas, crispr_annotations
@@ -79,13 +86,33 @@ def write_results_to_file(
                 for my_dict in (ncrnas, trnas, crispr_annotations, main_gff_extended):
                     if contig in my_dict and pos in my_dict[contig]:
                         for line in my_dict[contig][pos]:
-                            if type(line) == str:
-                                file_out.write("{}\n".format(line))
+                            if type(line) is str:
+                                file_out.write(f"{line}\n")
                             else:
                                 for element in line:
                                     file_out.write(element)
         for line in fasta:
-            file_out.write("{}\n".format(line))
+            file_out.write(f"{line}\n")
+
+
+def print_pseudogene_report(pseudogene_report_dict, pseudogene_report_file):
+    with open(pseudogene_report_file, "w") as file_out:
+        writer = csv.writer(file_out, delimiter="\t", lineterminator="\n")
+        # Print header
+        writer.writerow(
+            [
+                "ID",
+                "Pseudogene according to Bakta/Prokka",
+                "Pseudogene according to Pseudofinder",
+                "AntiFam hit",
+            ]
+        )
+
+        all_keys = ["gene_caller", "pseudofinder", "antifams"]
+        for protein, attributes in pseudogene_report_dict.items():
+            # Fill in missing attributes with False
+            line = [protein] + [str(attributes.get(key, False)) for key in all_keys]
+            writer.writerow(line)
 
 
 def sort_positions(contig, main_gff_extended, ncrnas, trnas, crispr_annotations):
@@ -107,9 +134,10 @@ def check_for_additional_keys(ncrnas, trnas, crispr_annotations, contig_list):
 
 def get_iprs(ipr_annot):
     iprs = {}
+    antifams = list()
     if not ipr_annot:
-        return iprs
-    with open(ipr_annot, "r") as f:
+        return iprs, antifams
+    with open(ipr_annot) as f:
         for line in f:
             cols = line.strip().split("\t")
             protein = cols[0]
@@ -119,6 +147,9 @@ def get_iprs(ipr_annot):
                 continue
             if evalue > 1e-10:
                 continue
+            if cols[3] == "AntiFam":
+                antifams.append(protein)
+                continue
             if protein not in iprs:
                 iprs[protein] = [set(), set()]
             if cols[3] == "Pfam":
@@ -128,12 +159,12 @@ def get_iprs(ipr_annot):
                 ipr = cols[11]
                 if not ipr == "-":
                     iprs[protein][1].add(ipr)
-    return iprs
+    return iprs, antifams
 
 
 def get_eggnog(eggnog_annot):
     eggnogs = {}
-    with open(eggnog_annot, "r") as f:
+    with open(eggnog_annot) as f:
         for line in f:
             line = line.rstrip()
             cols = line.split("\t")
@@ -183,7 +214,7 @@ def get_bgcs(bgc_file, prokka_gff, tool):
         return bgc_annotations
     # save positions of each BGC cluster to dictionary cluster_positions
     # and save the annotations to dictionary bgc_result
-    with open(bgc_file, "r") as bgc_in:
+    with open(bgc_file) as bgc_in:
         for line in bgc_in:
             if not line.startswith("#"):
                 (
@@ -245,7 +276,7 @@ def get_bgcs(bgc_file, prokka_gff, tool):
                         {"bgc_function": type_value},
                     )
     # identify CDSs that fall into each of the clusters annotated by the BGC tool
-    with open(prokka_gff, "r") as gff_in:
+    with open(prokka_gff) as gff_in:
         for line in gff_in:
             if not line.startswith("#"):
                 matching_interval = ""
@@ -309,7 +340,7 @@ def get_amr(amr_file):
     amr_annotations = {}
     if not amr_file:
         return amr_annotations
-    with open(amr_file, "r") as f:
+    with open(amr_file) as f:
         for line in f:
             if line.startswith("Protein identifier"):
                 continue
@@ -339,13 +370,13 @@ def get_amr(amr_file):
                 seq_name = seq_name.replace("=", " ")
             amr_annotations[protein_id] = ";".join(
                 [
-                    "amrfinderplus_gene_symbol={}".format(gene_name),
-                    "amrfinderplus_sequence_name={}".format(seq_name),
-                    "amrfinderplus_scope={}".format(scope),
-                    "element_type={}".format(element_type),
-                    "element_subtype={}".format(element_subtype),
-                    "drug_class={}".format(drug_class),
-                    "drug_subclass={}".format(drug_subclass),
+                    f"amrfinderplus_gene_symbol={gene_name}",
+                    f"amrfinderplus_sequence_name={seq_name}",
+                    f"amrfinderplus_scope={scope}",
+                    f"element_type={element_type}",
+                    f"element_subtype={element_subtype}",
+                    f"drug_class={drug_class}",
+                    f"drug_subclass={drug_subclass}",
                 ]
             )
     return amr_annotations
@@ -356,7 +387,7 @@ def get_dbcan(dbcan_file):
     substrates = dict()
     if not dbcan_file:
         return dbcan_annotations
-    with open(dbcan_file, "r") as f:
+    with open(dbcan_file) as f:
         for line in f:
             if "predicted PUL" in line:
                 annot_fields = line.strip().split("\t")[8].split(";")
@@ -399,7 +430,7 @@ def get_defense_finder(df_file):
     type_info = dict()
     if not df_file:
         return defense_finder_annotations
-    with open(df_file, "r") as f:
+    with open(df_file) as f:
         for line in f:
             if "Anti-phage system" in line:
                 annot_fields = line.strip().split("\t")[8].split(";")
@@ -427,6 +458,29 @@ def get_defense_finder(df_file):
     return defense_finder_annotations
 
 
+def get_pseudogenes(pseudofinder_file):
+    pseudogenes = dict()
+    if not pseudofinder_file:
+        return pseudogenes
+    with open(pseudofinder_file) as file_in:
+        for line in file_in:
+            if not line.startswith("#"):
+                col9 = line.strip().split("\t")[8]
+                attributes_dict = dict(
+                    re.split(r"(?<!\\)=", item) for item in re.split(r"(?<!\\);", col9)
+                )
+                if "note" in attributes_dict:
+                    note = attributes_dict["note"]
+                else:
+                    note = ""
+                if "old_locus_tag" in attributes_dict:
+                    tags = attributes_dict["old_locus_tag"].split(",")
+                    for tag in tags:
+                        if "_ign_" not in tag:
+                            pseudogenes[tag] = note
+    return pseudogenes
+
+
 def load_annotations(
     in_gff,
     eggnog_file,
@@ -437,36 +491,78 @@ def load_annotations(
     gecco_file,
     dbcan_file,
     defense_finder_file,
+    pseudofinder_file,
 ):
     eggnogs = get_eggnog(eggnog_file)
-    iprs = get_iprs(ipr_file)
+    iprs, antifams = get_iprs(ipr_file)
     sanntis_bgcs = get_bgcs(sanntis_file, in_gff, tool="sanntis")
     gecco_bgcs = get_bgcs(gecco_file, in_gff, tool="gecco")
     antismash_bgcs = get_bgcs(antismash_file, in_gff, tool="antismash")
     amr_annotations = get_amr(amr_file)
     dbcan_annotations = get_dbcan(dbcan_file)
     defense_finder_annotations = get_defense_finder(defense_finder_file)
+    pseudogenes = get_pseudogenes(pseudofinder_file)
+    pseudogene_report_dict = dict()
     added_annot = {}
     main_gff = dict()
     header = []
     fasta = []
     fasta_flag = False
-    with open(in_gff, "r") as f:
+    with open(in_gff) as f:
         for line in f:
             line = line.strip()
             if line[0] != "#" and not fasta_flag:
                 line = line.replace("db_xref", "Dbxref")
+                line = line.replace(";note=", ";Note=")
+                line = line.replace("‘", "'").replace("’", "'")
                 cols = line.split("\t")
                 if len(cols) == 9:
-                    contig, caller, feature, start, annot = cols[0], cols[1], cols[2], cols[3], cols[8]
+                    contig, caller, feature, start, annot = (
+                        cols[0],
+                        cols[1],
+                        cols[2],
+                        cols[3],
+                        cols[8],
+                    )
                     if feature != "CDS":
                         if caller == "Bakta" and feature == "region":
-                            main_gff.setdefault(contig, dict()).setdefault(int(start), list()).append(line)
+                            main_gff.setdefault(contig, dict()).setdefault(
+                                int(start), list()
+                            ).append(line)
                             continue
                         else:
                             continue
                     protein = annot.split(";")[0].split("=")[-1]
+                    if protein in antifams:
+                        # Don't print to the final GFF proteins that are known to not be real
+                        continue
                     added_annot[protein] = {}
+                    # process pseudogenes
+                    if "pseudo=true" in annot.lower():
+                        # fix case
+                        cols[8] = annot.replace("pseudo=True", "pseudo=true")
+                        # gene is already marked as a pseudogene; log it but don't add to the annotation again
+                        pseudogene_report_dict.setdefault(protein, dict())
+                        pseudogene_report_dict[protein]["gene_caller"] = True
+                        if protein in pseudogenes:
+                            pseudogene_report_dict[protein]["pseudofinder"] = True
+                        else:
+                            pseudogene_report_dict[protein]["pseudofinder"] = False
+                    else:
+                        # gene caller did not detect this protein as a pseudogene; check if pseudofinder did
+                        if protein in pseudogenes:
+                            pseudogene_report_dict.setdefault(protein, dict())
+                            pseudogene_report_dict[protein]["gene_caller"] = False
+                            pseudogene_report_dict[protein]["pseudofinder"] = True
+                            added_annot[protein]["pseudo"] = "true"
+                            if pseudogenes[protein]:
+                                cols[8] = add_pseudogene_to_note(
+                                    pseudogenes[protein], cols[8]
+                                )
+                    # record antifams
+                    if protein in antifams:
+                        pseudogene_report_dict.setdefault(protein, dict())
+                        pseudogene_report_dict[protein]["antifams"] = True
                     try:
                         eggnogs[protein]
                         pos = 0
@@ -536,10 +632,10 @@ def load_annotations(
                         if type(value) is list:
                             value = ",".join(value)
                         if a in ["AMR", "dbCAN", "defense_finder"]:
-                            cols[8] = "{};{}".format(cols[8], value)
+                            cols[8] = f"{cols[8]};{value}"
                         else:
                             if not value == "-":
-                                cols[8] = "{};{}={}".format(cols[8], a, value)
+                                cols[8] = f"{cols[8]};{a}={value}"
                     line = "\t".join(cols)
                     main_gff.setdefault(contig, dict()).setdefault(
                         int(start), list()
@@ -552,20 +648,39 @@ def load_annotations(
                     header.append(line)
             elif fasta_flag:
                 fasta.append(line)
-    return header, main_gff, fasta
+    return header, main_gff, fasta, pseudogene_report_dict
+
+
+def add_pseudogene_to_note(note_text, col9):
+    col9_dict = dict(
+        re.split(r"(?<!\\)=", item) for item in re.split(r"(?<!\\);", col9)
+    )
+    if "Note" in col9_dict.keys():
+        col9_dict["Note"] = col9_dict["Note"] + f", {note_text}"
+        return ";".join([f"{key}={value}" for key, value in col9_dict.items()])
+    else:
+        # insert note after locus tag
+        keys_list = list(col9_dict.keys())
+        locus_tag_index = keys_list.index("locus_tag")
+        new_dict = (
+            {k: col9_dict[k] for k in keys_list[: locus_tag_index + 1]}
+            | {"Note": note_text}
+            | {k: col9_dict[k] for k in keys_list[locus_tag_index + 1 :]}
+        )
+        return ";".join([f"{key}={value}" for key, value in new_dict.items()])
 
 
 def get_ncrnas(ncrnas_file):
     ncrnas = {}
     counts = 0
-    with open(ncrnas_file, "r") as f:
+    with open(ncrnas_file) as f:
         for line in f:
             if not line.startswith("#"):
                 cols = line.strip().split()
                 counts += 1
                 contig = cols[3]
-                locus = "{}_ncRNA{}".format(contig, counts)
-                product = cols[-1]
+                locus = f"{contig}_ncRNA{counts}"
+                product = " ".join(cols[28:])
                 model = cols[2]
                 if model == "RF00005":
                     # Skip tRNAs, we add them from tRNAscan-SE
@@ -577,6 +692,7 @@ def get_ncrnas(ncrnas_file):
                 else:
                     start = int(cols[10])
                     end = int(cols[9])
+                rna_feature_name, ncrna_class = prepare_rna_gff_fields(cols)
                 annot = [
                     "ID=" + locus,
                     "inference=Rfam:14.9",
@@ -584,12 +700,14 @@ def get_ncrnas(ncrnas_file):
                     "product=" + product,
                     "rfam=" + model,
                 ]
+                if ncrna_class:
+                    annot.append(f"ncRNA_class={ncrna_class}")
                 annot = ";".join(annot)
                 newline = "\t".join(
                     [
                         contig,
                         "INFERNAL:1.1.4",
-                        "ncRNA",
+                        rna_feature_name,
                         str(start),
                         str(end),
                         ".",
@@ -604,9 +722,165 @@ def get_ncrnas(ncrnas_file):
     return ncrnas
 
 
+def prepare_rna_gff_fields(cols):
+    rna_feature_name = "ncRNA"
+    if cols[1] in ["LSU_rRNA_bacteria", "SSU_rRNA_bacteria", "5S_rRNA"]:
+        rna_feature_name = "rRNA"
+    ncrna_class = ""
+    rna_types = {
+        "antisense_RNA": [
+            "RF00039",
+            "RF00042",
+            "RF00057",
+            "RF00106",
+            "RF00107",
+            "RF00236",
+            "RF00238",
+            "RF00240",
+            "RF00242",
+            "RF00262",
+            "RF00388",
+            "RF00489",
+            "RF01695",
+            "RF01794",
+            "RF01797",
+            "RF01809",
+            "RF01813",
+            "RF02194",
+            "RF02235",
+            "RF02236",
+            "RF02237",
+            "RF02238",
+            "RF02239",
+            "RF02519",
+            "RF02550",
+            "RF02558",
+            "RF02559",
+            "RF02560",
+            "RF02563",
+            "RF02592",
+            "RF02662",
+            "RF02674",
+            "RF02735",
+            "RF02743",
+            "RF02792",
+            "RF02793",
+            "RF02812",
+            "RF02818",
+            "RF02819",
+            "RF02820",
+            "RF02839",
+            "RF02843",
+            "RF02844",
+            "RF02846",
+            "RF02850",
+            "RF02851",
+            "RF02855",
+            "RF02873",
+            "RF02874",
+            "RF02875",
+            "RF02876",
+            "RF02891",
+            "RF02892",
+            "RF02903",
+            "RF02908",
+        ],
+        "autocatalytically_spliced_intron": ["RF01807"],
+        "ribozyme": [
+            "RF00621",
+            "RF01787",
+            "RF01788",
+            "RF01865",
+            "RF02678",
+            "RF02679",
+            "RF02681",
+            "RF02682",
+            "RF02684",
+            "RF03154",
+            "RF03160",
+            "RF04188",
+        ],
+        "hammerhead_ribozyme": [
+            "RF00008",
+            "RF00163",
+            "RF02275",
+            "RF02276",
+            "RF02277",
+            "RF03152",
+        ],
+        "RNase_P_RNA": [
+            "RF00009",
+            "RF00010",
+            "RF00011",
+            "RF00373",
+            "RF01577",
+            "RF02357",
+        ],
+        "RNase_MRP_RNA": ["RF00030", "RF02472"],
+        "telomerase_RNA": ["RF00024", "RF00025", "RF01050", "RF02462"],
+        "scaRNA": [
+            "RF00231",
+            "RF00283",
+            "RF00286",
+            "RF00422",
+            "RF00423",
+            "RF00424",
+            "RF00426",
+            "RF00427",
+            "RF00478",
+            "RF00492",
+            "RF00553",
+            "RF00564",
+            "RF00565",
+            "RF00582",
+            "RF00601",
+            "RF00602",
+            "RF01268",
+            "RF01295",
+            "RF02665",
+            "RF02666",
+            "RF02667",
+            "RF02668",
+            "RF02669",
+            "RF02670",
+            "RF02718",
+            "RF02719",
+            "RF02720",
+            "RF02721",
+            "RF02722",
+        ],
+        "snRNA": ["RF01802"],
+        "SRP_RNA": [
+            "RF00017",
+            "RF00169",
+            "RF01502",
+            "RF01570",
+            "RF01854",
+            "RF01855",
+            "RF01856",
+            "RF01857",
+            "RF04183",
+        ],
+        "vault_RNA": ["RF00006"],
+        "Y_RNA": ["RF00019", "RF02553", "RF01053", "RF02565"],
+    }
+
+    if rna_feature_name == "ncRNA":
+        for rna_type, rfams in rna_types.items():
+            if cols[2] in rfams:
+                ncrna_class = rna_type
+                break
+        if not ncrna_class:
+            if "microRNA" in cols[-1]:
+                ncrna_class = "pre_miRNA"
+            else:
+                ncrna_class = "other"
+    return rna_feature_name, ncrna_class
+
+
 def get_trnas(trnas_file):
     trnas = {}
-    with open(trnas_file, "r") as f:
+    with open(trnas_file) as f:
         for line in f:
             if not line.startswith("#"):
                 cols = line.split("\t")
@@ -621,7 +895,7 @@ def get_trnas(trnas_file):
 
 def load_crispr(crispr_file):
     crispr_annotations = dict()
-    with open(crispr_file, "r") as f:
+    with open(crispr_file) as f:
         record = list()
         left_coord = ""
         loc_contig = ""
@@ -629,7 +903,7 @@ def load_crispr(crispr_file):
         for line in f:
             if not line.startswith("#"):
                 cols = line.strip().split("\t")
-                contig, feature, start, end = (
+                contig, _, start, end = (
                     cols[0],
                     cols[2],
                     int(cols[3]),
@@ -724,11 +998,19 @@ def parse_args():
         help="The GFF file produced by Defense Finder post-processing script",
         required=False,
     )
+    parser.add_argument(
+        "--pseudofinder",
+        help="The GFF file produced by the Pseudofinder post-processing script",
+        required=False,
+    )
     parser.add_argument("-r", dest="rfam", help="Rfam results", required=True)
     parser.add_argument(
         "-t", dest="trnascan", help="tRNAScan-SE results", required=True
     )
     parser.add_argument("-o", dest="outfile", help="Outfile name", required=True)
+    parser.add_argument(
+        "--pseudogene-report", help="Pseudogene report filename", required=False
+    )
 
     return parser.parse_args()
 
@@ -746,7 +1028,9 @@ def parse_args():
         args.gecco,
         args.dbcan,
         args.defense_finder,
+        args.pseudofinder,
         args.rfam,
         args.trnascan,
         args.outfile,
+        args.pseudogene_report,
     )
diff --git a/bin/antismash_to_gff.py b/bin/antismash_to_gff.py
index 0a5b60a1..b6e9c265 100755
--- a/bin/antismash_to_gff.py
+++ b/bin/antismash_to_gff.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,9 +15,8 @@
 #
 import argparse
 import json
-from collections import namedtuple
-
 import logging
+from collections import namedtuple
 
 logger = logging.getLogger(__name__)
 
diff --git a/bin/circos_plot.py b/bin/circos_plot.py
index 037fdc4d..01c5c41c 100755
--- a/bin/circos_plot.py
+++ b/bin/circos_plot.py
@@ -1,28 +1,26 @@
 #!/usr/bin/env python3
-# coding=utf-8
 
 import argparse
 import logging
 import sys
 
+from matplotlib.patches import Patch
 from pycirclize import Circos
 from pycirclize.parser import Gff
 
-from matplotlib.patches import Patch
-
 logging.basicConfig(level=logging.INFO)
 
 
-def main(infile, outfile, prefix, contig_num_limit, contig_trim, mobilome, skip_sanntis, dpi):
+def main(
+    infile, outfile, prefix, contig_num_limit, contig_trim, mobilome, skip_sanntis, dpi
+):
     modified_infile = remove_escaped_characters(infile)
     gff = Gff(modified_infile)
     seqid2size = gff.get_seqid2size()
     if len(seqid2size) > contig_num_limit:
         logging.info(
-            "Skipping plot generation for file {} due to a large number of contigs: {}. "
-            "Plots are only generated for genomes with up to {} annotated contigs.".format(
-                infile, len(seqid2size), contig_num_limit
-            )
+            f"Skipping plot generation for file {infile} due to a large number of contigs: {len(seqid2size)}. "
+            f"Plots are only generated for genomes with up to {contig_num_limit} annotated contigs."
         )
         sys.exit()
 
@@ -30,7 +28,7 @@ def main(infile, outfile, prefix, contig_num_limit, contig_trim, mobilome, skip_
 
     circos = Circos(seqid2size, space=1, start=1, end=358)
 
-    circos.text("{}\n".format(prefix), size=15, r=30)
+    circos.text(f"{prefix}\n", size=15, r=30)
 
     # Skip printing contig names if the names are too long
     print_contigs = True
@@ -116,7 +114,7 @@ def main(infile, outfile, prefix, contig_num_limit, contig_trim, mobilome, skip_
                 if "defense_finder_type" in feature.qualifiers:
                     antiphage_track.genomic_features([feature], fc="orchid")
 
-            elif feature.type in ["tRNA", "ncRNA"]:
+            elif feature.type in ["tRNA", "ncRNA", "rRNA"]:
                 rna_track.genomic_features([feature], fc="darkmagenta")
             elif mobilome and feature.type in [
                 "mobility_island",
@@ -164,7 +162,7 @@ def main(infile, outfile, prefix, contig_num_limit, contig_trim, mobilome, skip_
 
 def remove_escaped_characters(infile):
     outfile = infile + "_modified"
-    with open(infile, "r") as file_in:
+    with open(infile) as file_in:
         content = file_in.read()
         modified_content = content.replace("\\=", "")
 
diff --git a/bin/convert_cds_into_multiple_lines.py b/bin/convert_cds_into_multiple_lines.py
index e4a8ad80..2a91a902 100755
--- a/bin/convert_cds_into_multiple_lines.py
+++ b/bin/convert_cds_into_multiple_lines.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -79,7 +78,7 @@ def edit_id_add_parent(attributes_dict, id, parent=None):
 
     # Read the input GFF file and process the entries
     output_entries = []
-    with open(input_file, "r") as infile:
+    with open(input_file) as infile:
         for line in infile:
             entry = line.strip().split("\t")
             new_entries = split_cds_to_gene_exon_mrna(entry)
diff --git a/bin/identify_kingdom.py b/bin/identify_kingdom.py
index df790318..462bac11 100755
--- a/bin/identify_kingdom.py
+++ b/bin/identify_kingdom.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
 # Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
@@ -29,9 +28,9 @@
 def main(taxid, include_kingdom, outfile):
     kingdom = "Bacteria"
     if not taxid.isdigit():
-        sys.exit("Invalid format of taxid {}".format(taxid))
+        sys.exit(f"Invalid format of taxid {taxid}")
     try:
-        url = "https://www.ebi.ac.uk/ena/taxonomy/rest/tax-id/{}".format(taxid)
+        url = f"https://www.ebi.ac.uk/ena/taxonomy/rest/tax-id/{taxid}"
         r = run_request(url)
         res = r.json()
         lineage = res.get("lineage", "")
@@ -46,20 +45,18 @@ def main(taxid, include_kingdom, outfile):
             pass
         else:
             logging.error(
-                "Unknown lineage {}. Reporting default kingdom instead: Bacteria.".format(
-                    lineage
-                )
+                f"Unknown lineage {lineage}. Reporting default kingdom instead: Bacteria."
             )
-    except:
+    except:  # noqa E722
         logging.error(
-            "Unable to identify lineage for taxid {}. "
-            "Reporting default kingdom instead: Bacteria.".format(taxid)
+            f"Unable to identify lineage for taxid {taxid}. "
+            "Reporting default kingdom instead: Bacteria."
         )
-    logging.info("Reporting kingdom {} for taxid {}".format(kingdom, taxid))
+    logging.info(f"Reporting kingdom {kingdom} for taxid {taxid}")
     if outfile:
         if include_kingdom:
             outfile_path, outfile_name = os.path.split(outfile)
-            outfile_new_name = "{}_{}".format(kingdom, outfile_name)
+            outfile_new_name = f"{kingdom}_{outfile_name}"
             outfile = os.path.join(outfile_path, outfile_new_name)
         with open(outfile, "w") as file_out:
             file_out.write(kingdom)
@@ -90,7 +87,7 @@ def parse_args():
     )
     parser.add_argument(
         "--include-kingdom",
-        action='store_true',
+        action="store_true",
         help="Add the kingdom name at the beginning of the file name.",
     )
     parser.add_argument(
diff --git a/bin/make_combined_unifire_output.py b/bin/make_combined_unifire_output.py
index f03abc4c..13769859 100755
--- a/bin/make_combined_unifire_output.py
+++ b/bin/make_combined_unifire_output.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -43,15 +42,13 @@ def main(indir, outfile):
 
 def load_file(dbname, indir, filename, result):
     if not os.path.exists(os.path.join(indir, filename)):
-        logging.error(
-            "File {} does not exist in folder {}. Aborting.".format(filename, indir)
-        )
+        logging.error(f"File {filename} does not exist in folder {indir}. Aborting.")
         sys.exit()
-    with open(os.path.join(indir, filename), "r") as file_in:
+    with open(os.path.join(indir, filename)) as file_in:
         for line in file_in:
             if not line.startswith("Evidence"):
                 prot = line.split("\t")[1]
-                new_line = "{}\t{}".format(dbname, line)
+                new_line = f"{dbname}\t{line}"
                 result.setdefault(prot, list()).append(new_line)
     return result
 
diff --git a/bin/prepare_gff_for_conversion.py b/bin/prepare_gff_for_conversion.py
new file mode 100755
index 00000000..588023c6
--- /dev/null
+++ b/bin/prepare_gff_for_conversion.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import re
+
+
+def main(infile, outfile):
+    with open(infile) as file_in, open(outfile, "w") as file_out:
+        fasta_flag = False
+        for line in file_in:
+            if line.startswith("##FASTA"):
+                fasta_flag = True
+                file_out.write(line)
+            elif line.startswith("#"):
+                file_out.write(line)
+            elif fasta_flag:
+                file_out.write(line)
+            else:
+                new_line = modify_line(line)
+                file_out.write(new_line + "\n")
+
+
+def modify_line(line):
+    columns = line.strip().split("\t")
+    if columns[2] in ["region", "tRNA"]:
+        return line.strip()
+
+    # Parse the 9th field into key-value pairs
+    attributes_dict = dict(
+        re.split(r"(?<!\\)=", item) for item in re.split(r"(?<!\\);", columns[8])
+    )
+    # Move all GO terms into Dbxref and deduplicate if needed
+    attributes_dict = collect_go_terms(attributes_dict)
+
+    # Move Pfam, Rfam and InterPro into Dbxref
+    terms_to_move = ["pfam", "rfam", "interpro"]
+    attributes_dict = move_to_dbxref(attributes_dict, terms_to_move)
+
+    attributes_to_move_to_note = {
+        "antismash_bgc_function": "antismash_bgc_function",
+        "nearest_MiBIG": "nearest_MiBIG",
+        "nearest_MiBIG_class": "nearest_MiBIG_class",
+        "gecco_bgc_type": "gecco_bgc_type",
+        "cog": "COG",
+        "kegg": "KEGG",
+        "amrfinderplus_gene_symbol": "amrfinderplus_gene_symbol",
+        "amrfinderplus_scope": "amrfinderplus_scope",
+        "amrfinderplus_sequence_name": "amrfinderplus_sequence_name",
+        "element_type": "element_type",
+        "element_subtype": "element_subtype",
+        "dbcan_prot_family": "dbcan_protein_family",
+        "dbcan_prot_type": "dbcan_protein_type",
+        "drug_class": "drug_class",
+        "drug_subclass": "drug_subclass",
+        "eggNOG": "eggNOG",
+        "substrate_dbcan-pul": "dbcan-PUL_substrate",
+        "substrate_dbcan-sub": "dbcan-subfam_substrate",
+        "uf_chebi": "UniFIRE_ChEBI",
+        "uf_gene_name": "UniFIRE_gene_name",
+        "uf_gene_name_synonym": "UniFIRE_gene_name_synonym",
+        "uf_keyword": "UniFIRE_keyword",
+        "uf_pirsr_cofactor": "UniFIRE_pirsr_cofactor",
+        "uf_prot_alt_ecnumber": "UniFIRE_protein_alternative_EC_number",
+        "uf_prot_alt_fullname": "UniFIRE_protein_alternative_fullname",
+        "uf_prot_alt_shortname": "UniFIRE_protein_alternative_shortname",
+        "uf_prot_rec_ecnumber": "UniFIRE_protein_recommended_EC_number",
+        "uf_prot_rec_fullname": "UniFIRE_protein_recommended_fullname",
+        "uf_prot_rec_shortname": "UniFIRE_protein_recommended_shortname",
+        "defense_finder_type": "anti-phage_system_type",
+        "defense_finder_subtype": "anti-phage_system_subtype",
+    }
+    attributes_dict = move_values_to_note(attributes_dict, attributes_to_move_to_note)
+
+    # Reconstruct the 9th field
+    columns[8] = ";".join([f"{key}={value}" for key, value in attributes_dict.items()])
+
+    # Return the modified line
+    return "\t".join(columns)
+
+
+def move_values_to_note(attributes_dict, attributes_to_move_to_note):
+    for term, reformatted_term in attributes_to_move_to_note.items():
+        if term in attributes_dict:
+            if "Note" in attributes_dict:
+                attributes_dict[
+                    "Note"
+                ] += f",{reformatted_term}:{attributes_dict[term]}"
+            else:
+                attributes_dict["Note"] = f"{reformatted_term}:{attributes_dict[term]}"
+            attributes_dict.pop(term)
+    return attributes_dict
+
+
+def move_to_dbxref(attributes_dict, terms_to_move):
+    reformatted_term_names = {
+        "pfam": "PFAM",
+        "rfam": "RFAM",
+        "interpro": "InterPro",
+    }
+    formatted_list_to_move = []
+    for term in terms_to_move:
+        if term in attributes_dict:
+            reformatted_term_name = reformatted_term_names[term]
+            formatted_list_to_move.extend(
+                f"{reformatted_term_name}:{element}"
+                for element in attributes_dict[term].split(",")
+            )
+            attributes_dict.pop(term)
+    if len(formatted_list_to_move) > 0:
+        text_to_add = ",".join(formatted_list_to_move)
+        if "Dbxref" in attributes_dict:
+            attributes_dict["Dbxref"] = attributes_dict["Dbxref"] + "," + text_to_add
+        else:
+            attributes_dict["Dbxref"] = text_to_add
+        attributes_dict["Dbxref"] = attributes_dict["Dbxref"].rstrip(",").lstrip(",")
+    return attributes_dict
+
+
+def collect_go_terms(attributes_dict):
+    # Function collects GO terms from across the 9th column into one list and deduplicates them
+    go_terms = list()
+    from_fields = ["Dbxref", "Ontology_term", "uf_ontology_term"]
+    for field in from_fields:
+        if field in attributes_dict:
+            annotations = attributes_dict[field].split(",")
+            for annotation in annotations[
+                :
+            ]:  # Using a copy of the list to avoid iteration issues
+                if annotation.startswith("GO:"):
+                    go_terms.append(annotation)
+                    annotations.remove(annotation)
+            if len(annotations) > 0:
+                modified_annotations = ",".join(annotations)
+                attributes_dict[field] = modified_annotations
+            else:
+                del attributes_dict[field]
+    # deduplicate
+    go_terms = list(set(go_terms))
+    if len(go_terms) > 0:
+        if "Dbxref" in attributes_dict:
+            attributes_dict["Dbxref"] = (
+                attributes_dict["Dbxref"] + "," + ",".join(go_terms)
+            )
+        else:
+            attributes_dict["Dbxref"] = ",".join(go_terms)
+        attributes_dict["Dbxref"] = attributes_dict["Dbxref"].rstrip(",")
+    return attributes_dict
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Script processes final GFF to prepare for conversion "
+        "for ENA/GenBank."
+    )
+    parser.add_argument(
+        "-i", "--infile", required=True, help="Path to the GFF file to plot"
+    )
+    parser.add_argument(
+        "-o", "--outfile", required=True, help="Path to the output file"
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        args.infile,
+        args.outfile,
+    )
diff --git a/bin/prepare_unifire_input.py b/bin/prepare_unifire_input.py
index 56b56c5d..470d992f 100755
--- a/bin/prepare_unifire_input.py
+++ b/bin/prepare_unifire_input.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -27,13 +26,11 @@ def main(infile, taxid, outdir):
     check_dir(outdir)
     if not taxid.isdigit():
         sys.exit(
-            "Taxid must consist of digits only. Taxid {} is not valid. Exiting.".format(
-                taxid
-            )
+            f"Taxid must consist of digits only. Taxid {taxid} is not valid. Exiting."
         )
     outfile = "proteins.fasta"
     outpath = os.path.join(outdir, outfile)
-    with open(outpath, "w") as file_out, open(infile, "r") as file_in:
+    with open(outpath, "w") as file_out, open(infile) as file_in:
         for line in file_in:
             if line.startswith(">"):
                 formatted_line = reformat_line(line, taxid)
@@ -53,10 +50,10 @@ def check_dir(directory_path):
 def reformat_line(line, taxid):
     line = line.lstrip(">").strip()
     id, description = line.split(maxsplit=1)
-    description = description.replace("\"", "").replace("'", "").replace("‘", "").replace("’", "")
-    formatted_line = ">tr|{id}|{description} OX={taxid}\n".format(
-        id=id, description=description, taxid=taxid
+    description = (
+        description.replace('"', "").replace("'", "").replace("‘", "").replace("’", "")
     )
+    formatted_line = f">tr|{id}|{description} OX={taxid}\n"
     return formatted_line
 
 
diff --git a/bin/process_amrfinderplus_results.py b/bin/process_amrfinderplus_results.py
index e3a77dbd..2774eda4 100755
--- a/bin/process_amrfinderplus_results.py
+++ b/bin/process_amrfinderplus_results.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,7 +22,7 @@
 
 
 def main(amr_file, outfile, version):
-    with open(amr_file, "r") as file_in, open(outfile, "w") as file_out:
+    with open(amr_file) as file_in, open(outfile, "w") as file_out:
         writer = csv.writer(file_out, delimiter="\t")
         writer.writerow(["##gff-version 3"])
         for line in file_in:
diff --git a/bin/process_crispr_results.py b/bin/process_crispr_results.py
index 9485a582..34586135 100755
--- a/bin/process_crispr_results.py
+++ b/bin/process_crispr_results.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,7 +41,7 @@ def create_gff(
         for gff in gffs:
             filename_base = gff.split("/")[-1].split(".")[0]
             if filename_base in hits:
-                with open(gff, "r") as gff_in:
+                with open(gff) as gff_in:
                     for line in gff_in:
                         if line.startswith("#") or len(line.strip()) == 0:
                             continue
@@ -83,7 +82,7 @@ def fix_capitalisation(line):
 
 
 def remove_leader_attribute(line):
-    """ GFFs produced by CRISPRCasFinder have a "leader" attribute without any value making the GFF invalid"""
+    """GFFs produced by CRISPRCasFinder have a "leader" attribute without any value making the GFF invalid"""
     if ";leader;" in line:
         line = line.replace("leader;", "")
     return line
@@ -107,8 +106,8 @@ def fix_crispr_id(line):
         elif a.startswith("ID="):
             id = a.split("=")[1]
     # swap the values of "name" and "id"
-    fixed_annot = re.sub("ID={}".format(id), "ID={}".format(name), annot)
-    fixed_annot = re.sub("Name={}".format(name), "Name={}".format(id), fixed_annot)
+    fixed_annot = re.sub(f"ID={id}", f"ID={name}", annot)
+    fixed_annot = re.sub(f"Name={name}", f"Name={id}", fixed_annot)
     fields[8] = fixed_annot
     return "\t".join(fields) + "\n"
 
@@ -116,7 +115,7 @@ def fix_crispr_id(line):
 def add_evidence_level(line, evidence_levels):
     crispr_id = get_crispr_id(line)
     try:
-        line = "{}evidence_level={}\n".format(line.strip(), evidence_levels[crispr_id])
+        line = f"{line.strip()}evidence_level={evidence_levels[crispr_id]}\n"
     except Exception:
         logging.error(f"Cannot get evidence level for CRISPR {crispr_id}")
     return line
@@ -183,17 +182,15 @@ def fix_gff_line(line, fasta):
 
 
 def fix_annotation(feature_seq, at_percentage, annotation):
-    annotation = annotation.replace("at%=0;", "at%={};".format(at_percentage))
-    annotation = annotation.replace(
-        "sequence=UNKNOWN", "sequence={}".format(feature_seq)
-    )
+    annotation = annotation.replace("at%=0;", f"at%={at_percentage};")
+    annotation = annotation.replace("sequence=UNKNOWN", f"sequence={feature_seq}")
     return annotation
 
 
 def calc_at_percentage(feature_seq):
     a = feature_seq.upper().count("A")
     t = feature_seq.upper().count("T")
-    return str(int((round((a + t) * 100 / len(feature_seq), 0))))
+    return str(int(round((a + t) * 100 / len(feature_seq), 0)))
 
 
 def check_end_position(contig, end, seq_records):
@@ -211,7 +208,7 @@ def process_tsv(tsv_report, tsv_output):
     hq_hits = list()
     evidence_levels = dict()
     with open(tsv_output, "w") as tsv_out:
-        with open(tsv_report, "r") as tsv_in:
+        with open(tsv_report) as tsv_in:
             for line in tsv_in:
                 # ignore empty lines
                 if len(line.strip()) == 0:
@@ -225,7 +222,7 @@ def process_tsv(tsv_report, tsv_output):
                 # add sequence basename to hits
                 hits.append(parts[2])
                 # make crispr_id that the GFFs use
-                crispr_id = "{}_{}_{}".format(parts[1], parts[5], parts[6])
+                crispr_id = f"{parts[1]}_{parts[5]}_{parts[6]}"
                 # check if evidence level is high (2, 3 or 4)
                 if parts[-1] in ["2", "3", "4"]:
                     # add CRISPR ID (contig_start_end)
diff --git a/bin/process_dbcan_result.py b/bin/process_dbcan_result.py
index b7ab41a8..85cae776 100755
--- a/bin/process_dbcan_result.py
+++ b/bin/process_dbcan_result.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
 # Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
@@ -37,7 +36,7 @@ def load_cgcs(input_folder):
         for line in file_in:
             if not line.startswith("CGC#"):
                 cgc, _, contig, _, start, end, _, _ = line.strip().split("\t")
-                cgc_id = "{}_{}".format(contig, cgc)
+                cgc_id = f"{contig}_{cgc}"
                 if cgc_id in cgc_locations:
                     if cgc_locations[cgc_id]["start"] > int(start):
                         cgc_locations[cgc_id]["start"] = int(start)
@@ -62,9 +61,9 @@ def print_gff(input_folder, outfile, dbcan_version, substrates, cgc_locations):
                     cgc, gene_type, contig, prot_id, start, end, strand, protein_fam = (
                         line.strip().split("\t")
                     )
-                    cgc_id = "{}_{}".format(contig, cgc)
+                    cgc_id = f"{contig}_{cgc}"
                     protein_fam = protein_fam.replace(" ", "")
-                    if not cgc_id in cgcs_printed:
+                    if cgc_id not in cgcs_printed:
                         substrate = (
                             substrates[cgc_id]
                             if cgc_id in substrates
@@ -82,23 +81,13 @@ def print_gff(input_folder, outfile, dbcan_version, substrates, cgc_locations):
                         )
                         cgcs_printed.append(cgc_id)
                     file_out.write(
-                        "{}\tdbCAN:{}\t{}\t{}\t{}\t.\t{}\t.\tID={};Parent={};protein_family={}\n".format(
-                            contig,
-                            dbcan_version,
-                            gene_type,
-                            start,
-                            end,
-                            strand,
-                            prot_id,
-                            cgc_id,
-                            protein_fam,
-                        )
+                        f"{contig}\tdbCAN:{dbcan_version}\t{gene_type}\t{start}\t{end}\t.\t{strand}\t.\tID={prot_id};Parent={cgc_id};protein_family={protein_fam}\n"
                     )
 
 
 def load_substrates(input_folder):
     substrates = dict()
-    with open(os.path.join(input_folder, "substrate.out"), "r") as file_in:
+    with open(os.path.join(input_folder, "substrate.out")) as file_in:
         for line in file_in:
             if not line.startswith("#"):
                 parts = line.strip().split("\t")
@@ -117,9 +106,7 @@ def load_substrates(input_folder):
                 if not substrate_ecami:
                     substrate_ecami = "N/A"
                 substrates[cgc] = (
-                    "substrate_dbcan-pul={};substrate_dbcan-sub={}".format(
-                        substrate_pul, substrate_ecami
-                    )
+                    f"substrate_dbcan-pul={substrate_pul};substrate_dbcan-sub={substrate_ecami}"
                 )
     print(substrates)
     return substrates
@@ -129,7 +116,7 @@ def check_folder_completeness(input_folder):
     status = True
     for file in ["cgc_standard.out", "overview.txt", "substrate.out"]:
         if not os.path.exists(os.path.join(input_folder, file)):
-            logging.error("File {} does not exist.".format(file))
+            logging.error(f"File {file} does not exist.")
             status = False
     return status
 
diff --git a/bin/process_defensefinder_result.py b/bin/process_defensefinder_result.py
index 539bca4c..35ffb410 100755
--- a/bin/process_defensefinder_result.py
+++ b/bin/process_defensefinder_result.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,7 +34,7 @@ def main(input_folder, prokka_gff, outfile, df_version):
 
 def load_prokka(prokka_gff):
     prokka_data = dict()
-    with open(prokka_gff, "r") as file_in:
+    with open(prokka_gff) as file_in:
         for line in file_in:
             if not line.startswith("#"):
                 if line.startswith(">"):
@@ -55,7 +54,7 @@ def load_prokka(prokka_gff):
 
 
 def print_systems_to_file(system_path, gene_results, outfile, df_version, prokka_data):
-    with open(system_path, "r") as file_in, open(outfile, "w") as file_out:
+    with open(system_path) as file_in, open(outfile, "w") as file_out:
         writer = csv.writer(file_out, delimiter="\t")
         writer.writerow(["##gff-version 3"])
         for line in file_in:
@@ -67,7 +66,7 @@ def print_systems_to_file(system_path, gene_results, outfile, df_version, prokka
                     sys_beg_index,
                     sys_end_index,
                     protein_in_syst_index,
-                ) = [
+                ) = (
                     line.strip().split().index(field)
                     for field in [
                         "sys_id",
@@ -77,7 +76,7 @@ def print_systems_to_file(system_path, gene_results, outfile, df_version, prokka
                         "sys_end",
                         "protein_in_syst",
                     ]
-                ]
+                )
             else:
                 cols = line.strip().split("\t")
                 start = prokka_data[cols[sys_beg_index]]["start"]
@@ -120,14 +119,14 @@ def print_systems_to_file(system_path, gene_results, outfile, df_version, prokka
 
 def load_genes(gene_path):
     gene_results = dict()
-    with open(gene_path, "r") as file_in:
+    with open(gene_path) as file_in:
         for line in file_in:
             if line.lower().startswith("replicon"):
                 # get indices of fields
-                acc_index, gene_index, hit_status = [
+                acc_index, gene_index, hit_status = (
                     line.strip().split().index(field)
                     for field in ["hit_id", "gene_name", "hit_status"]
-                ]
+                )
             else:
                 fields = line.strip().split()
                 gene_results.setdefault(fields[acc_index], dict())
diff --git a/bin/process_unifire_output.py b/bin/process_unifire_output.py
index b4df861f..e09be9c2 100755
--- a/bin/process_unifire_output.py
+++ b/bin/process_unifire_output.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
-# Copyright 2023 EMBL - European Bioinformatics Institute
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -67,7 +66,7 @@ def combine_and_print(arba_dict, unirule_dict, pirsr_dict, gff, outfile):
 
     fasta_flag = False
     list_of_dicts = [arba_dict, unirule_dict, pirsr_dict]
-    with open(outfile, "w") as file_out, open(gff, "r") as file_in:
+    with open(outfile, "w") as file_out, open(gff) as file_in:
         writer = csv.writer(file_out, delimiter="\t")
         for line in file_in:
             if fasta_flag is True:
@@ -135,7 +134,7 @@ def escape_reserved_characters(combined_dict):
             for ch in reserved_characters:
                 if ch in value:
                     changes_flag = True
-                    value = value.replace(ch, "\{}".format(ch))
+                    value = value.replace(ch, f"\{ch}")
             if changes_flag:
                 remove_values.append(old_value)
                 add_values.append(value)
@@ -165,7 +164,7 @@ def get_id(col9):
 
 def load_pirsr(pirsr):
     results_dict = dict()
-    with open(pirsr, "r") as file_in:
+    with open(pirsr) as file_in:
         for line in file_in:
             if not line.startswith("Evidence"):
                 if any(keyword in line for keyword in ["keyword", "comment.cofactor"]):
@@ -203,7 +202,7 @@ def load_pirsr(pirsr):
 
 def load_unirule_arba(file):
     results_dict = dict()
-    with open(file, "r") as file_in:
+    with open(file) as file_in:
         for line in file_in:
             if not line.startswith("Evidence"):
                 parts = line.strip().split("\t")
diff --git a/conf/base.config b/conf/base.config
index f3db5c58..e87751fe 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -67,6 +67,10 @@ process {
     withName: GECCO_RUN {
         memory = { check_max( 2.GB * task.attempt, 'memory' ) }
     }
+    withName: ANTISMASH_GETDB {
+        cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory' ) }
+    }
     withName: SANNTIS {
         cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
         memory = { check_max( 2.GB * task.attempt, 'memory' ) }
@@ -91,4 +95,10 @@ process {
         cpus   = { check_max( 8    * task.attempt, 'cpus'    ) }
         memory = { check_max( 10.GB * task.attempt, 'memory' ) }
     }
+    withName: PSEUDOFINDER {
+        cpus   = { check_max( 8    * task.attempt, 'cpus'    ) }
+        memory = { check_max( 12.GB * task.attempt, 'memory' ) }
+        errorStrategy = 'retry'
+        maxRetries    = 3
+    }
 }
diff --git a/conf/ebi_codon.config b/conf/ebi_codon.config
index 7224d68d..e102c860 100644
--- a/conf/ebi_codon.config
+++ b/conf/ebi_codon.config
@@ -16,4 +16,6 @@ params {
     dbcan_db = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/dbcan/4.1.3-V12"
 
     bakta_db = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/bakta/2024-01-19"
+
+    pseudofinder_db = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/uniprotkb/2024_06"
 }
diff --git a/conf/embl_hd.config b/conf/embl_hd.config
new file mode 100644
index 00000000..040f9a07
--- /dev/null
+++ b/conf/embl_hd.config
@@ -0,0 +1,24 @@
+apptainer {
+    enabled        = true
+    autoMounts     = true
+	ociAutoPull    = false                    // Disabled this as it's causing issues with the home folder of the usres running out of storage
+    pullTimeout    = "3 hours"                // The default is 20 minutes and fails with large images
+    envWhitelist   = 'CUDA_VISIBLE_DEVICES'   // Allow the bounding of GPU visible device variable into the containers
+    cacheDir = params.singularity_cachedir ?: System.getenv('SINGULARITY_CACHEDIR') ?: System.getenv('NFX_SINGULARITY_CACHEDIR')
+}
+
+process {
+    maxRetries = 1
+    cache = 'lenient'
+    afterScript = 'sleep 10'
+    queue = { (task.time <= 14.d && task.memory < 256.GB && (task.memory.div(task.cpus)) <= 4.GB) ? "htc-el8" : "bigmem" }
+    scratch = false
+}
+
+executor {
+    name = "slurm"
+    queueSize = 200
+    submitRateLimit = "10/1sec"
+    pollInterval = '10sec'
+    exitReadTimeout = "5 min"
+}
diff --git a/conf/modules.config b/conf/modules.config
index 918b3d63..bd445502 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -181,7 +181,15 @@ process {
         ]
     }
 
-    withName: PROKKA {
+    withName: PSEUDOFINDER_POSTPROCESSING {
+        publishDir = [
+            path: { "${params.outdir}/${meta.prefix}/functional_annotation/pseudofinder" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: "PROKKA_STANDARD" {
         publishDir = [
             path: { "${params.outdir}/${meta.prefix}/functional_annotation/prokka" },
             mode: params.publish_dir_mode,
diff --git a/conf/test.config b/conf/test.config
index 193d5f89..43493b21 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -26,4 +26,8 @@ params {
     antismash_db = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/antismash/7.1.x"
 
     dbcan_db = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/dbcan/4.1.3-V12"
+
+    bakta_db = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/bakta/2024-01-19"
+
+    pseudofinder_db = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/uniprotkb/2024_06/uniprot_sprot_2024_06.fasta"
 }
diff --git a/media/mettannotator-schema.png b/media/mettannotator-schema.png
index 4f3f9169..3bfae495 100644
Binary files a/media/mettannotator-schema.png and b/media/mettannotator-schema.png differ
diff --git a/modules/local/amrfinder_plus.nf b/modules/local/amrfinder_plus.nf
index 7d8d1832..ca61d7ce 100644
--- a/modules/local/amrfinder_plus.nf
+++ b/modules/local/amrfinder_plus.nf
@@ -14,6 +14,10 @@ process AMRFINDER_PLUS {
 
     script:
     """
+    # this is needed as some environments are very picky with the TMP dir
+    export TMPDIR="\$PWD/tmp"
+    mkdir -p "\$PWD/tmp"
+
     amrfinder --plus \
     -n ${fna} \
     -p ${faa} \
diff --git a/modules/local/amrfinder_plus_getdb.nf b/modules/local/amrfinder_plus_getdb.nf
index 42bc9712..88fe885f 100644
--- a/modules/local/amrfinder_plus_getdb.nf
+++ b/modules/local/amrfinder_plus_getdb.nf
@@ -3,7 +3,12 @@ process AMRFINDER_PLUS_GETDB {
 
     tag "AMR Finder DB 2024-01-31.1"
 
-    container 'quay.io/biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.12.8--h283d18e_0' :
+        'biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0' }"
+
+    // amrfinder_index won't work if singularity mounts the workdir
+    scratch false
 
     publishDir "$params.dbs", mode: 'copy'
 
@@ -12,6 +17,9 @@ process AMRFINDER_PLUS_GETDB {
 
     script:
     """
+    export TMPDIR="\$PWD/tmp"
+    mkdir -p "\$PWD/tmp"
+
     wget -r -nH --cut-dirs=5 \\
     ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.12/2024-01-31.1/
 
diff --git a/modules/local/annotate_gff.nf b/modules/local/annotate_gff.nf
index ead2aba7..67728234 100644
--- a/modules/local/annotate_gff.nf
+++ b/modules/local/annotate_gff.nf
@@ -4,7 +4,9 @@ process ANNOTATE_GFF {
 
     label 'process_nano'
 
-    container 'quay.io/microbiome-informatics/genomes-pipeline.python3base:v1.1'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/python:3.9' :
+        'biocontainers/python:3.9' }"
 
     input:
     tuple val(meta),
@@ -18,6 +20,7 @@ process ANNOTATE_GFF {
         file(gecco_gff),
         file(dbcan_gff),
         file(df_gff),
+        file(pseudofinder_gff),
         file(ips_annotations_tsv),     // empty in fast mode
         file(sanntis_annotations_gff), // empty in fast mode
         file(arba),                    // empty in fast mode
@@ -28,6 +31,8 @@ process ANNOTATE_GFF {
     output:
     tuple val(meta), path("*_annotations.gff"),                                   emit: annotated_gff
     tuple val(meta), path("*_annotations_with_descriptions.gff"), optional: true, emit: annotated_desc_gff
+    tuple val(meta), path("*_submission.gff"),                                    emit: submission_gff
+    tuple val(meta), path("*_pseudogene_report.txt"),                             emit: pseudogene_report
     path "versions.yml",                                                          emit: versions
 
     script:
@@ -37,6 +42,7 @@ process ANNOTATE_GFF {
     def gecco_flag = "";
     def dbcan_flag = "";
     def df_flag = "";
+    def pseudofinder_flag = "";
     def ips_flag = "";
     def hypothetical_ips_flags = "";
     def sanntis_flag = "";
@@ -58,6 +64,9 @@ process ANNOTATE_GFF {
     if ( df_gff ) {
         df_flag = "--defense-finder ${df_gff}"
     }
+    if ( pseudofinder_gff ) {
+        pseudofinder_flag = "--pseudofinder ${pseudofinder_gff} --pseudogene-report ${meta.prefix}_pseudogene_report.txt"
+    }
     if ( ips_annotations_tsv ) {
         ips_flag = "-i ${ips_annotations_tsv}"
         hypothetical_ips_flags = [
@@ -80,7 +89,7 @@ process ANNOTATE_GFF {
     -r ${ncrna_tsv} \
     -t ${trna_gff} \
     -o ${meta.prefix}_temp.gff \
-    ${crisprcas_flag} ${sanntis_flag} ${amrfinder_flag} \
+    ${crisprcas_flag} ${sanntis_flag} ${amrfinder_flag} ${pseudofinder_flag}\
     ${antismash_flag} ${gecco_flag} ${dbcan_flag} ${df_flag} ${ips_flag}
 
     if [ "${params.fast}" == "false" ]; then
@@ -97,6 +106,10 @@ process ANNOTATE_GFF {
     -i ${hypothetical_tmp_gff} \\
     -o ${meta.prefix}_annotations.gff ${hypothetical_ips_flags}
 
+    prepare_gff_for_conversion.py \\
+    -i ${meta.prefix}_annotations.gff \\
+    -o ${meta.prefix}_submission.gff
+
     if [ "${params.fast}" == "false" ]; then
         add_interpro_descriptions.py \\
         --ipr-entries ${interpro_entry_list}/entry.list \\
@@ -113,6 +126,8 @@ process ANNOTATE_GFF {
     stub:
     """
     touch ${meta.prefix}_annotations.gff
+    touch ${meta.prefix}_submission.gff
+    touch ${meta.prefix}_pseudogene_report.txt
     if [ "${params.fast}" == "false" ]; then
         touch ${meta.prefix}_annotations_with_descriptions.gff
     fi
diff --git a/modules/local/bakta_getdb.nf b/modules/local/bakta_getdb.nf
index bc219d4b..09f34e1c 100644
--- a/modules/local/bakta_getdb.nf
+++ b/modules/local/bakta_getdb.nf
@@ -3,7 +3,9 @@ process BAKTA_GETDB {
 
     tag "BAKTA DB 2024-01-19"
 
-    container 'quay.io/biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.12.8--h283d18e_0' :
+        'biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0' }"
 
     publishDir "$params.dbs", mode: 'copy'
 
@@ -13,11 +15,14 @@ process BAKTA_GETDB {
     script:
     """
     wget https://zenodo.org/record/10522951/files/db.tar.gz
+
     tar -xzf db.tar.gz
     rm db.tar.gz
     mv db bakta
+
     wget -r -nH --cut-dirs=5 \\
     ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.12/2024-01-31.1/
+
     rm -r bakta/amrfinderplus-db/*
     mv 2024-01-31.1 bakta/amrfinderplus-db/
 
diff --git a/modules/local/crisprcasfinder.nf b/modules/local/crisprcasfinder.nf
index 1f633dde..1174575b 100644
--- a/modules/local/crisprcasfinder.nf
+++ b/modules/local/crisprcasfinder.nf
@@ -4,7 +4,7 @@ process CRISPRCAS_FINDER {
 
     label 'process_nano'
 
-    container 'quay.io/microbiome-informatics/genomes-pipeline.crisprcasfinder:4.3.2'
+    container 'quay.io/microbiome-informatics/genomes-pipeline.crisprcasfinder:4.3.2patchedv5'
 
     input:
     tuple val(meta), path(fasta)
diff --git a/modules/local/dbcan.nf b/modules/local/dbcan.nf
index ee303145..87976458 100644
--- a/modules/local/dbcan.nf
+++ b/modules/local/dbcan.nf
@@ -2,7 +2,9 @@ process DBCAN {
 
     tag "${meta.prefix}"
 
-    container 'quay.io/biocontainers/dbcan:4.1.2--pyhdfd78af_0'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/dbcan:4.1.2--pyhdfd78af_0' :
+        'biocontainers/dbcan:4.1.2--pyhdfd78af_0' }"
 
     input:
     tuple val(meta), path(faa), path(gff)
diff --git a/modules/local/dbcan_getdb.nf b/modules/local/dbcan_getdb.nf
index 7690232e..3bf7de33 100644
--- a/modules/local/dbcan_getdb.nf
+++ b/modules/local/dbcan_getdb.nf
@@ -2,7 +2,9 @@ process DBCAN_GETDB {
 
     tag "DBCan 4.1.3_V12"
 
-    container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+        'biocontainers/gnu-wget:1.18--h36e9172_9' }"
 
     publishDir "${params.dbs}", mode: 'copy'
 
diff --git a/modules/local/defense_finder.nf b/modules/local/defense_finder.nf
index 3f9f27bb..f7e7363c 100644
--- a/modules/local/defense_finder.nf
+++ b/modules/local/defense_finder.nf
@@ -2,7 +2,9 @@ process DEFENSE_FINDER {
 
     tag "${meta.prefix}"
 
-    container 'biocontainers/defense-finder:1.2.0--pyhdfd78af_0'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/defense-finder:1.2.0--pyhdfd78af_0' :
+        'biocontainers/defense-finder:1.2.0--pyhdfd78af_0' }"
 
     input:
     tuple val(meta), path(faa), path(prokka_gff)
diff --git a/modules/local/defense_finder_getdb.nf b/modules/local/defense_finder_getdb.nf
index 1af0e5cd..73df9fd9 100644
--- a/modules/local/defense_finder_getdb.nf
+++ b/modules/local/defense_finder_getdb.nf
@@ -3,7 +3,9 @@ process DEFENSE_FINDER_GETDB {
 
     tag "Defense Finder Models 1.2.3"
 
-    container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+        'biocontainers/gnu-wget:1.18--h36e9172_9' }"
 
     publishDir "$params.dbs", mode: 'copy'
 
diff --git a/modules/local/detect_ncrna.nf b/modules/local/detect_ncrna.nf
index adb3199b..edca4895 100644
--- a/modules/local/detect_ncrna.nf
+++ b/modules/local/detect_ncrna.nf
@@ -1,6 +1,8 @@
 
 process DETECT_NCRNA {
+
     tag "${meta.prefix}"
+
     container 'quay.io/microbiome-informatics/genomes-pipeline.detect_rrna:v3.2'
 
     input:
diff --git a/modules/local/detect_trna.nf b/modules/local/detect_trna.nf
index 9303ce19..8e233b9d 100644
--- a/modules/local/detect_trna.nf
+++ b/modules/local/detect_trna.nf
@@ -25,18 +25,19 @@ process DETECT_TRNA {
 
     # tRNAscan-SE needs a tmp folder otherwise it will use the base TMPDIR (with no subfolder)
     # and that causes issues as other detect_trna process will crash when the files are cleaned
-    PROCESSTMP="\$(mktemp -d)"
+    PROCESSTMP="\$PWD/tmp"
+    mkdir -p "\$PWD/tmp"
     export TMPDIR="\${PROCESSTMP}"
-    # bash trap to clean the tmp directory
-    trap 'rm -r -- "\${PROCESSTMP}"' EXIT
 
     echo "[ Detecting tRNAs ]"
     tRNAscan-SE -${detected_kingdom} -Q \
     -m ${meta.prefix}_stats.out \
     -o ${meta.prefix}_trna.out \
-    --gff ${meta.prefix}_trna.gff \
+    --gff ${meta.prefix}_trna_temp.gff \
     ${fasta}
 
+    add_locus_tag_to_trna.py -i ${meta.prefix}_trna_temp.gff -o ${meta.prefix}_trna.gff
+
     parse_tRNA.py -i ${meta.prefix}_stats.out -o ${meta.prefix}_tRNA_20aa.out
 
     echo "Completed"
diff --git a/modules/local/eggnog.nf b/modules/local/eggnog.nf
index 04fee25b..c8418bce 100644
--- a/modules/local/eggnog.nf
+++ b/modules/local/eggnog.nf
@@ -52,7 +52,8 @@ process EGGNOG_MAPPER {
         --cpu ${task.cpus} \
         --tax_scope 'prokaryota_broad' \
         --annotate_hits_table ${annotation_hit_table} ${db_mem_flag} \
-        -o ${meta.prefix}
+        -o ${meta.prefix} \
+        --override
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
diff --git a/modules/local/eggnog_getdb.nf b/modules/local/eggnog_getdb.nf
index 3c72ef6c..be7a39a9 100644
--- a/modules/local/eggnog_getdb.nf
+++ b/modules/local/eggnog_getdb.nf
@@ -2,7 +2,9 @@ process EGGNOG_MAPPER_GETDB {
 
     tag "EGGNOG Mapper DB 5.0.2"
 
-    container "quay.io/biocontainers/gnu-wget:1.18--h36e9172_9"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+        'biocontainers/gnu-wget:1.18--h36e9172_9' }"
 
     publishDir "${params.dbs}", mode: "copy"
 
diff --git a/modules/local/interpro_list_getdb.nf b/modules/local/interpro_list_getdb.nf
index d8d25992..6b5a56a5 100644
--- a/modules/local/interpro_list_getdb.nf
+++ b/modules/local/interpro_list_getdb.nf
@@ -2,7 +2,9 @@ process INTEPRO_ENTRY_LIST_GETDB {
 
     tag "InterPro Entry List 94.0"
 
-    container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+        'biocontainers/gnu-wget:1.18--h36e9172_9' }"
 
     publishDir "${params.dbs}", mode: 'copy'
 
diff --git a/modules/local/interproscan.nf b/modules/local/interproscan.nf
index 7795eec8..6768f81b 100644
--- a/modules/local/interproscan.nf
+++ b/modules/local/interproscan.nf
@@ -8,7 +8,7 @@ process INTERPROSCAN {
 
     container 'quay.io/microbiome-informatics/genomes-pipeline.ips:5.62-94.0'
     containerOptions {
-        if (workflow.containerEngine == 'singularity') {
+        if (workflow.containerEngine in ['singularity', 'apptainer']) {
             return "--bind ${interproscan_db}/data:/opt/interproscan-5.62-94.0/data"
         } else {
             return "-v ./${interproscan_db}/data:/opt/interproscan-5.62-94.0/data"
diff --git a/modules/local/interproscan_getdb.nf b/modules/local/interproscan_getdb.nf
index 895ea6c1..08032200 100644
--- a/modules/local/interproscan_getdb.nf
+++ b/modules/local/interproscan_getdb.nf
@@ -2,7 +2,9 @@ process INTEPROSCAN_GETDB {
 
     tag "IPRS Scan 5.62-94.0"
 
-    container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+        'biocontainers/gnu-wget:1.18--h36e9172_9' }"
 
     publishDir "${params.dbs}", mode: 'copy'
 
diff --git a/modules/local/lookup_kingdom.nf b/modules/local/lookup_kingdom.nf
index 4648331e..6e2a7972 100644
--- a/modules/local/lookup_kingdom.nf
+++ b/modules/local/lookup_kingdom.nf
@@ -4,14 +4,16 @@ process LOOKUP_KINGDOM {
 
     label 'process_nano'
 
-    container 'quay.io/microbiome-informatics/genomes-pipeline.python3base:v1.1'
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'oras://community.wave.seqera.io/library/pip_requests_retry:4b6e4901d175ea72' :
+        'community.wave.seqera.io/library/pip_requests_retry:d1a6734506332b90' }"
 
     input:
     tuple val(meta), path(fasta)
 
     output:
     tuple val(meta), env(detected_kingdom), emit: detected_kingdom
-    path "versions.yml",                      emit: versions
+    path "versions.yml",                    emit: versions
 
     script:
     """
diff --git a/modules/local/prokka.nf b/modules/local/prokka.nf
index 8f9b1f09..2825a00e 100644
--- a/modules/local/prokka.nf
+++ b/modules/local/prokka.nf
@@ -2,10 +2,13 @@ process PROKKA {
 
     tag "${meta.prefix}"
 
-    container "quay.io/biocontainers/prokka:1.14.6--pl526_0"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl526_0' :
+        'biocontainers/prokka:1.14.6--pl526_0' }"
 
     input:
     tuple val(meta), path(fasta), val(detected_kingdom)
+    val(mode) // standard or compliant
 
     output:
     tuple val(meta), file("${meta.prefix}_prokka/${meta.prefix}.gff"), emit: gff
@@ -17,7 +20,20 @@ process PROKKA {
     path "versions.yml" , emit: versions
 
     script:
+    def compliant_flag = "";
+    def rfam_flag = "";
+    if ( mode == "compliant" ){
+        compliant_flag = "--compliant"
+        rfam_flag = "--rfam"
+    }
     """
+    # TMP folder issues in Prokka - https://github.com/tseemann/prokka/issues/402
+    export TMPDIR="\$PWD/tmp"
+    mkdir -p "\$PWD/tmp"
+
+    # Disable the Java VM performane gathering tool, for improved performance
+    export JAVA_TOOL_OPTIONS="-XX:-UsePerfData"
+
     cat ${fasta} | tr '-' ' ' > ${meta.prefix}_cleaned.fasta
 
     prokka ${meta.prefix}_cleaned.fasta \
@@ -26,7 +42,9 @@ process PROKKA {
     --outdir ${meta.prefix}_prokka \
     --prefix ${meta.prefix} \
     --force \
-    --locustag ${meta.prefix}
+    --locustag ${meta.prefix} \
+    ${compliant_flag} \
+    ${rfam_flag}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/pseudofinder.nf b/modules/local/pseudofinder.nf
new file mode 100644
index 00000000..02f2e2e6
--- /dev/null
+++ b/modules/local/pseudofinder.nf
@@ -0,0 +1,60 @@
+process PSEUDOFINDER {
+
+    tag "${meta.prefix}"
+
+    container 'quay.io/microbiome-informatics/pseudofinder:1.1.0'
+
+    input:
+    tuple val(meta), path(compliant_gbk)
+    tuple path(pseudofinder_db), val(db_version)
+
+    output:
+    tuple val(meta), file("${meta.prefix}_pseudos.gff"), emit: pseudofinder_gff
+    path "versions.yml" , emit: versions
+
+    script:
+    """
+    pseudofinder.py annotate \
+    -g ${compliant_gbk} \
+    -db ${pseudofinder_db}/uniprot_sprot.fasta \
+    -op ${meta.prefix} \
+    -t ${task.cpus} \
+    --diamond
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Pseudofinder: 1.1.0
+        Swiss-Prot: $db_version
+    END_VERSIONS
+    """
+}
+
+process PSEUDOFINDER_POSTPROCESSING {
+
+    tag "${meta.prefix}"
+
+    label 'process_nano'
+
+    container 'quay.io/microbiome-informatics/genomes-pipeline.python3base:v1.1'
+
+    input:
+    tuple val(meta), path(annotations_gff), path(compliant_gff, stageAs: "compliant/*"), path(pseudofinder_gff)
+
+    output:
+    tuple val(meta), file("${meta.prefix}_processed_pseudogenes.gff"), emit: pseudofinder_processed_gff
+    path "versions.yml" , emit: versions
+
+    script:
+    """
+    adjust_pseudofinder_output.py \
+    --pseudofinder-output ${pseudofinder_gff} \
+    --standard-gff ${annotations_gff} \
+    --compliant-gff ${compliant_gff} \
+    -o ${meta.prefix}_processed_pseudogenes.gff
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version 2>&1 | sed 's/Python //g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/pseudofinder_getdb.nf b/modules/local/pseudofinder_getdb.nf
new file mode 100644
index 00000000..5544102d
--- /dev/null
+++ b/modules/local/pseudofinder_getdb.nf
@@ -0,0 +1,24 @@
+process PSEUDOFINDER_GETDB {
+
+    tag "Pseudofinder DB 2024_06"
+
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+        'biocontainers/gnu-wget:1.18--h36e9172_9' }"
+
+    publishDir "${params.dbs}", mode: "copy"
+
+    output:
+    tuple path("uniprot_sprot"), env("VERSION"), emit: pseudofinder_db
+
+    script:
+    """
+    wget ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/pseudofinder/uniprot_sprot_2024_06.tar.gz
+
+    mkdir -p uniprot_sprot
+
+    tar -zxvf uniprot_sprot_2024_06.tar.gz -C uniprot_sprot
+
+    VERSION=\$(cat uniprot_sprot/VERSION.txt)
+    """
+}
diff --git a/modules/local/rfam_getmodels.nf b/modules/local/rfam_getmodels.nf
index 1bfbd80f..6fbedb55 100644
--- a/modules/local/rfam_getmodels.nf
+++ b/modules/local/rfam_getmodels.nf
@@ -3,7 +3,9 @@ process RFAM_GETMODELS {
 
     tag "Rfam models - release 14.9"
 
-    container "quay.io/biocontainers/gnu-wget:1.18--h36e9172_9"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] ?
+        'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+        'biocontainers/gnu-wget:1.18--h36e9172_9' }"
 
     publishDir "$params.dbs/rfam_models", mode: 'copy'
 
diff --git a/modules/local/unifire.nf b/modules/local/unifire.nf
index cacb401f..7c8d2313 100644
--- a/modules/local/unifire.nf
+++ b/modules/local/unifire.nf
@@ -7,7 +7,7 @@ process UNIFIRE {
     container "dockerhub.ebi.ac.uk/uniprot-public/unifire:2023.4"
 
     containerOptions {
-        if (workflow.containerEngine == 'singularity') {
+        if (workflow.containerEngine in ['singularity', 'apptainer']) {
             return "--bind unifire:/volume"
         } else {
             return "-v ./unifire:/volume"
diff --git a/modules/nf-core/bakta/bakta/bakta-bakta.diff b/modules/nf-core/bakta/bakta/bakta-bakta.diff
index 0d2045fa..80947870 100644
--- a/modules/nf-core/bakta/bakta/bakta-bakta.diff
+++ b/modules/nf-core/bakta/bakta/bakta-bakta.diff
@@ -15,11 +15,11 @@ Changes in module 'nf-core/bakta/bakta'
 -    tag "$meta.id"
 +    tag "$meta.prefix"
      label 'process_medium'
- 
+
      conda "${moduleDir}/environment.yml"
 @@ -8,22 +8,22 @@
          'biocontainers/bakta:1.9.3--pyhdfd78af_0' }"
- 
+
      input:
 -    tuple val(meta), path(fasta)
 -    path db
@@ -27,7 +27,7 @@ Changes in module 'nf-core/bakta/bakta'
 -    path prodigal_tf
 +    tuple val(meta), path(fasta), path(detected_kingdom)
 +    tuple path(db), val(db_version)
- 
+
      output:
      tuple val(meta), path("${prefix}.embl")             , emit: embl
      tuple val(meta), path("${prefix}.faa")              , emit: faa
@@ -43,10 +43,10 @@ Changes in module 'nf-core/bakta/bakta'
 +    tuple val(meta), path("${prefix}.svg")              , emit: svg
 +    tuple val(meta), path("${prefix}.png")              , emit: png
      path "versions.yml"                                 , emit: versions
- 
+
      when:
 @@ -31,18 +31,22 @@
- 
+
      script:
      def args = task.ext.args   ?: ''
 -    prefix   = task.ext.prefix ?: "${meta.id}"
@@ -71,7 +71,7 @@ Changes in module 'nf-core/bakta/bakta'
 +        --skip-ncrna \\
 +        --skip-ncrna-region \\
 +        --skip-crispr
- 
+
      cat <<-END_VERSIONS > versions.yml
      "${task.process}":
 
diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test b/modules/nf-core/bakta/bakta/tests/main.nf.test
index bdceb16e..9ee7ed8c 100644
--- a/modules/nf-core/bakta/bakta/tests/main.nf.test
+++ b/modules/nf-core/bakta/bakta/tests/main.nf.test
@@ -25,7 +25,7 @@ nextflow_process {
 
         when {
             process {
-                """             
+                """
                 input[0] = [
                     [ id:'test', single_end:false ], // meta map
                     file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)
@@ -62,7 +62,7 @@ nextflow_process {
 
         when {
             process {
-                """  
+                """
                 input[0] = [[id: 'stub'],file('stub')]
                 input[1] = []
                 input[2] = []
diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test.snap b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap
index 40e30c36..4b4c50a7 100644
--- a/modules/nf-core/bakta/bakta/tests/main.nf.test.snap
+++ b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap
@@ -188,4 +188,4 @@
         },
         "timestamp": "2024-03-14T09:11:15.532858932"
     }
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
index da033408..7ee62bfe 100755
--- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -4,10 +4,11 @@
 """Provide functions to merge multiple versions.yml files."""
 
 
-import yaml
 import platform
 from textwrap import dedent
 
+import yaml
+
 
 def _make_versions_html(versions):
     """Generate a tabular HTML output of all versions for MultiQC."""
@@ -58,7 +59,9 @@ def main():
     }
 
     with open("$versions") as f:
-        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+        versions_by_process = (
+            yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+        )
 
     # aggregate versions by the module name (derived from fully-qualified process name)
     versions_by_module = {}
diff --git a/modules/nf-core/gecco/run/gecco-run.diff b/modules/nf-core/gecco/run/gecco-run.diff
index 68bb9873..ec491791 100644
--- a/modules/nf-core/gecco/run/gecco-run.diff
+++ b/modules/nf-core/gecco/run/gecco-run.diff
@@ -2,8 +2,8 @@ Changes in module 'nf-core/gecco/run'
 --- modules/nf-core/gecco/run/main.nf
 +++ modules/nf-core/gecco/run/main.nf
 @@ -13,12 +13,12 @@
- 
- 
+
+
      output:
 -    tuple val(meta), path("*.genes.tsv")    , optional: true, emit: genes
 -    tuple val(meta), path("*.features.tsv")                 , emit: features
@@ -17,12 +17,12 @@ Changes in module 'nf-core/gecco/run'
 +    tuple val(meta), path("*clusters*.gff"), optional: true, emit: gff
 +    tuple val(meta), path("*.json"),          optional: true, emit: json
      path "versions.yml"                     , emit: versions
- 
+
      when:
 @@ -39,6 +39,8 @@
          $custom_model \\
          $custom_hmm
- 
+
 +    gecco convert clusters -i ./ --format gff
 +
      cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/quast/quast.diff b/modules/nf-core/quast/quast.diff
index 999ae392..705c64e8 100644
--- a/modules/nf-core/quast/quast.diff
+++ b/modules/nf-core/quast/quast.diff
@@ -7,19 +7,19 @@ Changes in module 'nf-core/quast'
 -    label 'process_medium'
 +    tag "$meta.prefix"
 +    label 'process_single'
- 
+
      conda "bioconda::quast=5.2.0"
      container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
 @@ -9,7 +9,6 @@
- 
+
      input:
      tuple val(meta) , path(consensus)
 -    tuple val(meta2), path(fasta)
      tuple val(meta3), path(gff)
- 
+
      output:
 @@ -25,13 +24,12 @@
- 
+
      script:
      def args      = task.ext.args   ?: ''
 -    prefix        = task.ext.prefix ?: "${meta.id}"
@@ -35,7 +35,7 @@ Changes in module 'nf-core/quast'
          --threads $task.cpus \\
          $args \\
 @@ -50,10 +48,9 @@
- 
+
      stub:
      def args      = task.ext.args   ?: ''
 -    prefix        = task.ext.prefix ?: "${meta.id}"
diff --git a/nextflow.config b/nextflow.config
index 14cf6346..9b6ba819 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -45,8 +45,11 @@ params {
     bakta_db                       = null
     bakta_db_version               = "2024-01-19"
 
+    pseudofinder_db                = null
+    pseudofinder_db_version        = "2024_06"
+
    // Tool options
-    bakta                      = false
+    bakta                          = false
 
     // MultiQC options
     multiqc_config             = null
@@ -72,6 +75,9 @@ params {
     max_cpus                   = 16
     max_time                   = '240.h'
 
+    // Each profile is responsible for using this setting
+    singularity_cachedir       = ''
+
     // Schema validation default options
     validationFailUnrecognisedParams = false
     validationLenientMode            = false
@@ -173,7 +179,7 @@ profiles {
         executor.cpus          = 16
         executor.memory        = 60.GB
     }
-    test      { includeConfig 'conf/test.config'      }
+    test { includeConfig 'conf/test.config' }
 
     local {
         params {
@@ -228,6 +234,11 @@ profiles {
             pollInterval = "10 sec"
         }
     }
+
+    // EMBL Heidelberg
+    embl_hd {
+        includeConfig 'conf/embl_hd.config'
+    }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
@@ -282,7 +293,7 @@ manifest {
     description     = """ME TT assembly annotation pipeline"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '1.3'
+    version         = '1.4.0'
     doi             = 'https://doi.org/10.1101/2024.07.11.603040'
 }
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 54f821d6..16e27e54 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -1,5 +1,5 @@
 {
-  "$schema": "http://json-schema.org/draft-07/schema",
+  "$schema": "https://json-schema.org/draft-07/schema",
   "$id": "https://raw.githubusercontent.com/ebi-metagenomics/mettannotator/master/nextflow_schema.json",
   "title": "ebi-metagenomics/mettannotator pipeline parameters",
   "description": "ME TT assembly annotation pipeline",
@@ -36,8 +36,8 @@
         },
         "bakta": {
           "type": "boolean",
-          "default": false,
-          "description": "Use Bakta instead of Prokka for CDS annotation. Prokka will still be used for archaeal genomes."
+          "description": "Use Bakta instead of Prokka for CDS annotation. Prokka will still be used for archaeal genomes.",
+          "fa_icon": "fas fa-wrench"
         },
         "email": {
           "type": "string",
@@ -164,6 +164,17 @@
           "type": "string",
           "default": "2024-01-19",
           "description": "The Bakta reference database version."
+        },
+        "pseudofinder_db": {
+          "type": "string",
+          "format": "directory-path",
+          "description": "Pseudofinder reference database. Mettannotator uses SwissProt as the database for Pseudofinder.",
+          "help_text": "Set this variable to the path of the folder containing uniprot_sprot.fasta file to be used as the database."
+        },
+        "pseudofinder_db_version": {
+          "type": "string",
+          "default": "2024_06",
+          "description": "SwissProt version."
         }
       },
       "fa_icon": "fas fa-database"
@@ -310,6 +321,11 @@
           "type": "string",
           "description": "Custom MultiQC yaml file containing HTML including a methods description.",
           "fa_icon": "fas fa-cog"
+        },
+        "singularity_cachedir": {
+          "type": "string",
+          "fa_icon": "fab fa-docker",
+          "description": "The singularity/apptainer cache directory"
         }
       }
     }
diff --git a/postprocessing/add_mobilome_to_gff.py b/postprocessing/add_mobilome_to_gff.py
index aaa6ecec..cd706f8c 100644
--- a/postprocessing/add_mobilome_to_gff.py
+++ b/postprocessing/add_mobilome_to_gff.py
@@ -1,5 +1,19 @@
 #!/usr/bin/env python3
 
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import argparse
 import re
 
@@ -10,7 +24,7 @@ def main(mobilome_file, infile, outfile):
         list()
     )  # record which mobilome lines have already been printed to the output file
     fasta_flag = False
-    with open(infile, "r") as file_in, open(outfile, "w") as file_out:
+    with open(infile) as file_in, open(outfile, "w") as file_out:
         previous_contig = ""
         for line in file_in:
             if line.startswith("#"):
@@ -107,9 +121,7 @@ def sanity_check(mobilome_dict, printed_list):
     mobilome_count = sum(len(inner_dict) for inner_dict in mobilome_dict.values())
     assert (
         printed_list_length == mobilome_count
-    ), "The number of mobilome entries added to the GFF does not match the expected count: added {}, expected{}".format(
-        printed_list_length, mobilome_count
-    )
+    ), f"The number of mobilome entries added to the GFF does not match the expected count: added {printed_list_length}, expected{mobilome_count}"
 
 
 def look_for_lines_to_print(mobilome_dict, contig, start, printed_list):
@@ -154,7 +166,7 @@ def check_overlap(dictionary, sequence, start, end):
 
 def load_mobilome(infile):
     mobilome_dict = dict()
-    with open(infile, "r") as file_in:
+    with open(infile) as file_in:
         for line in file_in:
             if line.startswith("#"):
                 if line.startswith("##FASTA"):
diff --git a/postprocessing/duplicate_resolution.py b/postprocessing/duplicate_resolution.py
index d247fbac..baf0422a 100755
--- a/postprocessing/duplicate_resolution.py
+++ b/postprocessing/duplicate_resolution.py
@@ -1,5 +1,19 @@
 #!/usr/bin/env python3
 
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import argparse
 import logging
 import re
@@ -27,14 +41,14 @@ def main(reference, target, outfile, species):
     replacements = dict()  # gene names to change
     replacements_ids = dict()  # changes to make if alias is None
     reverse = list()  # undo some planned changes (remove these from replacements)
-    stats_out = open("{}_stats.txt".format(species), "w")
+    stats_out = open(f"{species}_stats.txt", "w")
     stats_out.write("Gene\tCopy number\tReplaced\tWhy not replaced/ Comment\n")
     for base in dedupl_dict:
         if (
             len(dedupl_dict[base]) > 1
         ):  # there are multiple gene copies, try to deduplicate
             printed_stat = ""
-            printed_stat += "{}\t{}\t".format(base, str(len(dedupl_dict[base])))
+            printed_stat += f"{base}\t{str(len(dedupl_dict[base]))}\t"
             counter += 1
             unknown_counter = 0
             decision_dict = dict()
@@ -55,7 +69,7 @@ def main(reference, target, outfile, species):
             # Decision dictionary has the following format (2 examples here):
             # {'fabHA': ['BACUNI_03004', 'BACUNI_04476']}
             # {'der': ['BACUNI_03006'], 'feoB': ['BACUNI_04461'], 'hydF': ['BACUNI_02997']}}
-            logging.debug("Decision dictionary: {}".format(decision_dict))
+            logging.debug(f"Decision dictionary: {decision_dict}")
             if len(decision_dict) == 0:
                 # unable to resolve duplicates, leave records as they are
                 stats_dict["unknowns_only"] = stats_dict.get("unknowns_only", 0) + 1
@@ -78,18 +92,14 @@ def main(reference, target, outfile, species):
                 else:
                     if alias in replacements:
                         sys.exit(
-                            "Error: something went wrong, alias {} is already in replacements".format(
-                                alias
-                            )
+                            f"Error: something went wrong, alias {alias} is already in replacements"
                         )
                     replacements[alias] = base
                     stats_dict["replaced"] = stats_dict.get("replaced", 0) + 1
                     replace = True
                     printed_stat += "Yes\t\n"
             else:
-                logging.debug(
-                    "Case is not clear {} {}".format(dedupl_dict[base], decision_dict)
-                )
+                logging.debug(f"Case is not clear {dedupl_dict[base]} {decision_dict}")
                 unique = check_value_uniqueness(
                     decision_dict
                 )  # check that the alias doesn't repeat
@@ -140,10 +150,10 @@ def main(reference, target, outfile, species):
                             if alias not in replacements:
                                 replacements[alias] = gene_name
                             else:
-                                ("Alias {} is already in replacements".format(alias))
+                                (f"Alias {alias} is already in replacements")
                                 sys.exit()
 
-                        logging.debug("Replaced one for one {}".format(decision_dict))
+                        logging.debug(f"Replaced one for one {decision_dict}")
                         stats_dict["replaced"] = stats_dict.get("replaced", 0) + 1
                         replace = True
                         printed_stat += "Yes\t\n"
@@ -192,14 +202,12 @@ def main(reference, target, outfile, species):
         )
     if made_replacements != (len(replacements) + len(replacements_ids)):
         sys.exit(
-            "Made {} replacements but expected {}".format(
-                str(made_replacements), str(len(replacements))
-            )
+            f"Made {str(made_replacements)} replacements but expected {str(len(replacements))}"
         )
-    logging.info("Total number of groups: {}".format(counter))
-    logging.info("Replacements: {}".format(replacements))
-    logging.info("Made replacements: {}".format(made_replacements))
-    logging.info("Reverse {}".format(reverse))
+    logging.info(f"Total number of groups: {counter}")
+    logging.info(f"Replacements: {replacements}")
+    logging.info(f"Made replacements: {made_replacements}")
+    logging.info(f"Reverse {reverse}")
     stats_out.close()
     return stats_dict
 
@@ -234,7 +242,7 @@ def try_to_remove_more_underscores(
                         "remaining after other replacements; no stable ID was assigned to this gene\n",
                     ).format(key)
                     return replacements, printed_stat, replacements_ids
-                if not deduplication_section[key]["alias"] in bacunis_already_replaced:
+                if deduplication_section[key]["alias"] not in bacunis_already_replaced:
                     replacements[deduplication_section[key]["alias"]] = key.split("_")[
                         0
                     ]
@@ -249,8 +257,8 @@ def try_to_remove_more_underscores(
 def make_replacement_file(target, outfile, replacements, replacements_ids, species):
     seq_flag = False
     count_replacements = list()
-    rep_out = open("{}_replacements.txt".format(species), "w")
-    with open(target, "r") as file_in, open(outfile, "w") as file_out:
+    rep_out = open(f"{species}_replacements.txt", "w")
+    with open(target) as file_in, open(outfile, "w") as file_out:
         for line in file_in:
             if line.startswith("#"):
                 if line.startswith("##FASTA"):
@@ -266,7 +274,7 @@ def make_replacement_file(target, outfile, replacements, replacements_ids, speci
                     alias_pattern = r";Alias=(.*?);"
                     try:
                         alias_name = re.search(alias_pattern, fields[8]).group(1)
-                    except:
+                    except:  # noqa: E722
                         alias_name = None
                     gene_alias_name = alias_name
                     gene_id = id
@@ -274,26 +282,24 @@ def make_replacement_file(target, outfile, replacements, replacements_ids, speci
                     gene_pattern = r";gene=(.*?);"
                     try:
                         gene_name = re.search(gene_pattern, fields[8]).group(1)
-                    except:
+                    except:  # noqa E722
                         gene_name = None
                     if fields[2] in ["CDS", "mRNA", "exon"]:
                         alias_name = gene_alias_name
                         id = gene_id
                     if alias_name and alias_name in replacements:
                         if fields[2] == "gene":
-                            rep_out.write(
-                                "{}\t{}\n".format(gene_name, replacements[alias_name])
-                            )
+                            rep_out.write(f"{gene_name}\t{replacements[alias_name]}\n")
                         line = line.replace(gene_name, replacements[alias_name])
                         count_replacements.append(alias_name)
-                    print("ID is {}".format(id))
+                    print(f"ID is {id}")
                     if id in replacements_ids:
                         print("ID IS IN IDS")
                         if fields[2] == "gene":
-                            rep_out.write(
-                                "{}\t{}\n".format(gene_name, replacements_ids[id])
+                            rep_out.write(f"{gene_name}\t{replacements_ids[id]}\n")
+                            print(
+                                "made replacement of", gene_name, replacements_ids[id]
                             )
-                            print("made replacement of", gene_name, replacements_ids[id])
                         line = line.replace(gene_name, replacements_ids[id])
                         count_replacements.append(id)
                     file_out.write(line)
@@ -312,9 +318,7 @@ def resolve_duplicate(
     base,
     printed_stat,
 ):
-    logging.debug(
-        "=============>RESOLVING {} {}".format(genes_to_resolve, reference_dict)
-    )
+    logging.debug(f"=============>RESOLVING {genes_to_resolve} {reference_dict}")
     resolved = dict()
     duplicate_removed = False
     for gene in reference_dict:
@@ -338,7 +342,7 @@ def resolve_duplicate(
         printed_stat += "Yes\t\n"
     else:
         sys.exit("Unknown case")
-    logging.debug("============>Resolved {}".format(resolved))
+    logging.debug(f"============>Resolved {resolved}")
     return replacements, reverse, stats_dict, printed_stat
 
 
@@ -368,7 +372,7 @@ def load_duplicates(infile):
     dedupl_dict = dict()
     gene_occurrence_counter = dict()
     alias_repeats = dict()
-    with open(infile, "r") as file_in:
+    with open(infile) as file_in:
         for line in file_in:
             if not line.startswith("#"):
                 if line.startswith(">"):
@@ -380,15 +384,15 @@ def load_duplicates(infile):
                     alias_pattern = r";Alias=(.*?);"
                     try:
                         gene_name = re.search(gene_pattern, annot).group(1)
-                    except:
+                    except:  # noqa E722
                         gene_name = None
                     try:
                         locus_name = re.search(locus_pattern, annot).group(1)
-                    except:
+                    except:  # noqa E722
                         locus_name = None
                     try:
                         alias_name = re.search(alias_pattern, annot).group(1)
-                    except:
+                    except:  # noqa E722
                         alias_name = None
                     if alias_name:
                         alias_repeats[alias_name] = alias_repeats.get(alias_name, 0) + 1
@@ -426,7 +430,7 @@ def get_prefix(species):
 
 def load_ref_genenames(reference, species_prefix):
     alias_genename_dict = dict()
-    with open(reference, "r") as file_in:
+    with open(reference) as file_in:
         for line in file_in:
             if not line.startswith("#"):
                 _, _, feature, _, _, _, _, _, annot = line.strip().split("\t")
diff --git a/postprocessing/mob_merger_v2.py b/postprocessing/mob_merger_v2.py
index a222843d..f063f8fb 100755
--- a/postprocessing/mob_merger_v2.py
+++ b/postprocessing/mob_merger_v2.py
@@ -1,15 +1,23 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# ruff: noqa: F841, N806, N803
 
-import re
 import argparse
-import sys
-import os.path
-
-
-##### This script merge the coordinates of predicted MGEs using the Mobilome Annotation Pipeline v2.0 and proMGE v2023
-##### Alejandra Escobar, EMBL-EBI
-##### Dec 6, 2023
-##### Apr 12, 2024 -> Fixes on merger function to debug overlapping coordinates of multiple nested MGEs
+import re
 
 
 def gff_parser(current_line):
@@ -49,7 +57,7 @@ def momo_parser(mobannot):
 
     boundaries = ["terminal_inverted_repeat_element", "direct_repeat_element"]
 
-    with open(mobannot, "r") as input_file:
+    with open(mobannot) as input_file:
         for line in input_file:
             line = line.rstrip()
             # Annotation lines have exactly 9 columns
@@ -154,7 +162,7 @@ def promge_parser(promge, meta):
         ("cellular", "cellular_recombinase"),
     ]
     mge_desc = {}
-    with open(meta, "r") as input_meta:
+    with open(meta) as input_meta:
         next(input_meta)
         for line in input_meta:
             (
@@ -182,7 +190,7 @@ def promge_parser(promge, meta):
 
     promge_dict = {}
     mgeR = {}
-    with open(promge, "r") as input_gff:
+    with open(promge) as input_gff:
         for line in input_gff:
             line = line.rstrip()
             # Annotation lines have exactly 9 columns
diff --git a/postprocessing/update_gff_with_alias.py b/postprocessing/update_gff_with_alias.py
index 4c59e29c..7f1df048 100644
--- a/postprocessing/update_gff_with_alias.py
+++ b/postprocessing/update_gff_with_alias.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
 # Copyright 2024 EMBL - European Bioinformatics Institute
 #
@@ -23,7 +22,7 @@ def main(infile, outfile, liftoff_file, field_from, field_to):
     alias_dictionary = load_aliases(liftoff_file, field_from)
     fasta_flag = False
     fields_to_check = return_fields_to_print_to(field_to)
-    with open(infile, "r") as file_in, open(outfile, "w") as file_out:
+    with open(infile) as file_in, open(outfile, "w") as file_out:
         for line in file_in:
             if line.startswith("#"):
                 if line.startswith("##FASTA"):
@@ -35,14 +34,15 @@ def main(infile, outfile, liftoff_file, field_from, field_to):
                 fields = line.strip().split("\t")
                 if fields[2] in fields_to_check:
                     attributes_dict = dict(
-                        re.split(r"(?<!\\)=", item) for item in re.split(r"(?<!\\);", fields[8])
+                        re.split(r"(?<!\\)=", item)
+                        for item in re.split(r"(?<!\\);", fields[8])
                     )
                     if fields[2] == "gene":
                         id = attributes_dict["ID"]
                     else:
                         id = attributes_dict["ID"].split(":")[1]
                     if id in alias_dictionary:
-                        line = line.strip() + ";{}".format(alias_dictionary[id]) + "\n"
+                        line = line.strip() + f";{alias_dictionary[id]}" + "\n"
                 file_out.write(line)
 
 
@@ -55,7 +55,7 @@ def return_fields_to_print_to(field_to):
 
 def load_aliases(liftoff_file, field_from):
     alias_dictionary = dict()
-    with open(liftoff_file, "r") as file_in:
+    with open(liftoff_file) as file_in:
         for line in file_in:
             if line.startswith("#"):
                 continue
diff --git a/postprocessing/update_gff_with_essentiality.py b/postprocessing/update_gff_with_essentiality.py
index 86d248f5..6fc9c332 100644
--- a/postprocessing/update_gff_with_essentiality.py
+++ b/postprocessing/update_gff_with_essentiality.py
@@ -1,51 +1,78 @@
 #!/usr/bin/env python3
 
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import sys
 
+
 def read_coordinates_file(coord_file):
     coordinates_dict = {}
-    with open(coord_file, 'r') as file:
+    with open(coord_file) as file:
         for line in file:
-            gene_name, start, end, essentiality, _ = line.strip().split('\t')
+            gene_name, start, end, essentiality, _ = line.strip().split("\t")
             coordinates_dict[(start, end)] = essentiality
     return coordinates_dict
 
+
 def update_gff_with_status(gff_file, coordinates_dict):
     updated_lines = []
     matched_coordinates = set()
-    with open(gff_file, 'r') as file:
+    with open(gff_file) as file:
         for line in file:
-            columns = line.strip().split('\t')
+            columns = line.strip().split("\t")
             if len(columns) >= 9 and columns[2] == "CDS":
                 start, end = columns[3], columns[4]
                 essentiality = coordinates_dict.get((start, end))
                 if essentiality:
-                    columns[8] += f";transit_combined_hmm_gumbel_essentiality={essentiality}"
-                    matched_coordinates.add((start,end))
-            updated_lines.append('\t'.join(columns) + '\n')
+                    columns[
+                        8
+                    ] += f";transit_combined_hmm_gumbel_essentiality={essentiality}"
+                    matched_coordinates.add((start, end))
+            updated_lines.append("\t".join(columns) + "\n")
         else:
-                updated_lines.append(line)
+            updated_lines.append(line)
     return updated_lines, matched_coordinates
 
+
 def write_updated_gff(output_file, updated_lines):
-    with open(output_file, 'w') as file:
+    with open(output_file, "w") as file:
         for line in updated_lines:
             file.write(line)
 
+
 def write_not_matched_file(not_matched_file, coordinates_dict, matched_coordinates):
-    not_matched_lines = [f"{start}\t{end}\t{essentiality}\n" for (start, end), (essentiality) in coordinates_dict.items() if (start, end) not in matched_coordinates]
-    with open(not_matched_file, 'w') as file:
+    not_matched_lines = [
+        f"{start}\t{end}\t{essentiality}\n"
+        for (start, end), (essentiality) in coordinates_dict.items()
+        if (start, end) not in matched_coordinates
+    ]
+    with open(not_matched_file, "w") as file:
         file.writelines(not_matched_lines)
 
+
 if __name__ == "__main__":
     if len(sys.argv) != 5:
-        print("""
+        print(
+            """
                 Usage: python script.py <gff_file> <essentiality_file> <output_gff_file> <not_matched_file>
 
                 Match coordinates from essentiality file, produce modified GFF with new key-value pair and a file containing unmatched essentiality lines.
                 The key inserted into the 9th column of CDS lines only is transit_combined_hmm_gumbel_essentiality (all lower case to fit with GFF specs)
                 Note that the input is a summary of essentiality data (i.e. medium ignored for now)
-        """)
+        """
+        )
         sys.exit(1)
 
     gff_file = sys.argv[1]
@@ -54,6 +81,8 @@ def write_not_matched_file(not_matched_file, coordinates_dict, matched_coordinat
     not_matched_file = sys.argv[4]
 
     coordinates_dict = read_coordinates_file(coordinates_file)
-    updated_lines, matched_coordinates = update_gff_with_status(gff_file, coordinates_dict)
+    updated_lines, matched_coordinates = update_gff_with_status(
+        gff_file, coordinates_dict
+    )
     write_updated_gff(output_file, updated_lines)
     write_not_matched_file(not_matched_file, coordinates_dict, matched_coordinates)
diff --git a/postprocessing/update_gff_with_mapped_uniprot_ids.py b/postprocessing/update_gff_with_mapped_uniprot_ids.py
index b1fdb655..bbf6cb9c 100644
--- a/postprocessing/update_gff_with_mapped_uniprot_ids.py
+++ b/postprocessing/update_gff_with_mapped_uniprot_ids.py
@@ -1,17 +1,31 @@
 #!/usr/bin/env python3
 
+# Copyright 2023-2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import argparse
-import sys
 import os
-import subprocess
 import re
+import subprocess
+import sys
 
 
 def load_uniprot_descriptions(uniprot_tsv):
     uniprot_descriptions = dict()
     entry_index = None
     protein_name_index = None
-    with open(uniprot_tsv, "r") as file_in:
+    with open(uniprot_tsv) as file_in:
         for line in file_in:
             if line.startswith("Entry"):
                 fields = line.strip().split("\t")
@@ -24,7 +38,9 @@ def load_uniprot_descriptions(uniprot_tsv):
                     sys.exit("Could not get uniprot indices to load descriptions")
             else:
                 fields = line.strip().split("\t")
-                uniprot_descriptions[fields[entry_index]] = escape_reserved_characters(fields[protein_name_index])
+                uniprot_descriptions[fields[entry_index]] = escape_reserved_characters(
+                    fields[protein_name_index]
+                )
     assert len(uniprot_descriptions) > 0, "Uniprot description dictionary is empty."
     return uniprot_descriptions
 
@@ -33,10 +49,10 @@ def escape_reserved_characters(string):
     reserved_characters = [";", "=", "&"]
     for ch in reserved_characters:
         if ch in string:
-            if ch == ';':
+            if ch == ";":
                 string = string.replace(ch, "/")
             else:
-                string = string.replace(ch, "\{}".format(ch))
+                string = string.replace(ch, f"\{ch}")
     return string
 
 
@@ -66,7 +82,7 @@ def run_blast(pipeline_fasta, uniprot_fasta, blast_output):
 
 def extract_best_hits(blast_output):
     best_hits = {}
-    with open(blast_output, "r") as file:
+    with open(blast_output) as file:
         for line in file:
             fields = line.strip().split("\t")
             query_id = fields[0]
@@ -90,13 +106,13 @@ def create_mapping_file(best_hits, mapping_file):
 
 def update_gff_with_mapping(gff_file, mapping_file, output_file, uniprot_descriptions):
     mapping_dict = {}
-    with open(mapping_file, "r") as file:
+    with open(mapping_file) as file:
         next(file)  # Skip header
         for line in file:
             query, result, _, _ = line.strip().split("\t")
             mapping_dict[result] = query
 
-    with open(gff_file, "r") as infile, open(output_file, "w") as outfile:
+    with open(gff_file) as infile, open(output_file, "w") as outfile:
         for line in infile:
             if line.startswith("#"):
                 outfile.write(line)
@@ -116,12 +132,20 @@ def update_gff_with_mapping(gff_file, mapping_file, output_file, uniprot_descrip
                         identifier = identifier[len("CDS:") :]
                     if identifier in mapping_dict:
                         if "Dbxref" in attributes_dict:
-                            attributes_dict["Dbxref"] += f",UniProt:{mapping_dict[identifier]}"
+                            attributes_dict[
+                                "Dbxref"
+                            ] += f",UniProt:{mapping_dict[identifier]}"
                         else:
-                            attributes_dict["Dbxref"] = f"UniProt:{mapping_dict[identifier]}"
+                            attributes_dict["Dbxref"] = (
+                                f"UniProt:{mapping_dict[identifier]}"
+                            )
                         if mapping_dict[identifier] in uniprot_descriptions:
-                            attributes_dict["uniprot_prot_name"] = uniprot_descriptions[mapping_dict[identifier]]
-                        fields[8] = ";".join([f"{key}={value}" for key, value in attributes_dict.items()])
+                            attributes_dict["uniprot_prot_name"] = uniprot_descriptions[
+                                mapping_dict[identifier]
+                            ]
+                        fields[8] = ";".join(
+                            [f"{key}={value}" for key, value in attributes_dict.items()]
+                        )
                 outfile.write("\t".join(fields) + "\n")
 
 
@@ -181,11 +205,7 @@ def update_gff_with_mapping(gff_file, mapping_file, output_file, uniprot_descrip
 
     species = args.species
     if " " in species:
-        sys.exit(
-            "The species parameter {} is invalid. Spaces are not allowed".format(
-                species
-            )
-        )
+        sys.exit(f"The species parameter {species} is invalid. Spaces are not allowed")
     if args.include_description:
         if not args.uniprot_tsv:
             sys.exit("Must provide a UniProt TSV if the --description flag is used")
@@ -213,6 +233,8 @@ def update_gff_with_mapping(gff_file, mapping_file, output_file, uniprot_descrip
 
     # Update GFF with mapping
     if args.include_description:
-        update_gff_with_mapping(gff_file, mapping_file, output_gff_file, uniprot_descriptions)
+        update_gff_with_mapping(
+            gff_file, mapping_file, output_gff_file, uniprot_descriptions
+        )
     else:
         update_gff_with_mapping(gff_file, mapping_file, output_gff_file, dict())
diff --git a/preprocessing/generate_input_file.py b/preprocessing/generate_input_file.py
index be1bd828..971d9ef7 100644
--- a/preprocessing/generate_input_file.py
+++ b/preprocessing/generate_input_file.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
 # Copyright 2024 EMBL - European Bioinformatics Institute
 #
@@ -26,21 +25,19 @@
 def main(infile, input_dir, bat_dir, outfile, no_prefix):
     all_prefixes = list()
     if not os.path.exists(infile):
-        sys.exit("Input file {} does not exist!".format(infile))
+        sys.exit(f"Input file {infile} does not exist!")
     if not os.path.exists(bat_dir):
-        sys.exit("BAT directory {} does not exist!".format(bat_dir))
+        sys.exit(f"BAT directory {bat_dir} does not exist!")
     if not os.path.exists(input_dir):
-        sys.exit("Input directory {} does not exist!".format(input_dir))
+        sys.exit(f"Input directory {input_dir} does not exist!")
 
-    with open(infile, "r") as f_in, open(outfile, "w") as f_out:
+    with open(infile) as f_in, open(outfile, "w") as f_out:
         f_out.write("prefix,assembly,taxid\n")
         for line in f_in:
             line = line.strip()
             if line.endswith(("gz", "zip")):
                 sys.exit(
-                    "Fasta file {} is archived. Please provide only unarchived files.".format(
-                        line
-                    )
+                    f"Fasta file {line} is archived. Please provide only unarchived files."
                 )
             taxid = parse_taxid(line, bat_dir)
             if not no_prefix:
@@ -55,18 +52,18 @@ def main(infile, input_dir, bat_dir, outfile, no_prefix):
                 all_prefixes.append(prefix)
             full_path = os.path.join(input_dir, line)
             if no_prefix:
-                f_out.write(",{},{}\n".format(full_path, taxid))
+                f_out.write(f",{full_path},{taxid}\n")
             else:
-                f_out.write("{},{},{}\n".format(prefix, full_path, taxid))
-    logging.info("Saved results to {}".format(outfile))
+                f_out.write(f"{prefix},{full_path},{taxid}\n")
+    logging.info(f"Saved results to {outfile}")
 
 
 def parse_taxid(fasta_file, bat_dir):
     bat_filename = os.path.splitext(fasta_file)[0] + ".bin2classification.txt"
     bat_filepath = os.path.join(bat_dir, bat_filename)
     if not os.path.exists(bat_filepath):
-        sys.exit("Bat file {} does not exist!".format(bat_filepath))
-    with open(bat_filepath, "r") as f_in:
+        sys.exit(f"Bat file {bat_filepath} does not exist!")
+    with open(bat_filepath) as f_in:
         # skip header
         f_in.readline()
         taxid_string = f_in.readline().strip().split("\t")[3]
@@ -74,8 +71,8 @@ def parse_taxid(fasta_file, bat_dir):
         if not taxid:
             taxid = "FILL IN MANUALY"
             logging.warning(
-                "No taxid found for taxid {}. If taxid is known, fill it out manually in the generated "
-                "output file.".format(taxid)
+                f"No taxid found for taxid {taxid}. If taxid is known, fill it out manually in the generated "
+                "output file."
             )
     return taxid
 
@@ -106,7 +103,7 @@ def parse_args():
         dest="infile",
         required=True,
         help="A file containing a list of genome files to include (file name only, with file extension, unzipped, "
-             "one file per line). ",
+        "one file per line). ",
     )
     parser.add_argument(
         "-d",
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 56110621..00000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff.
-# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
-[tool.ruff]
-line-length = 120
-target-version = "py38"
-cache-dir = "~/.cache/ruff"
-
-[tool.ruff.lint]
-select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"]
-
-[tool.ruff.lint.isort]
-known-first-party = ["nf_core"]
-
-[tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["E402", "F401"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 00000000..2c0938cc
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,5 @@
+pytest==8.3.3
+pytest-workflow==2.1.0
+pytest-datadir==1.5.0
+black==24.8.0
+ruff==0.6.8
diff --git a/subworkflows/download_databases.nf b/subworkflows/download_databases.nf
index f1f1347e..5ffb88d6 100644
--- a/subworkflows/download_databases.nf
+++ b/subworkflows/download_databases.nf
@@ -12,6 +12,7 @@ include { INTEPROSCAN_GETDB        } from '../modules/local/interproscan_getdb'
 include { INTEPRO_ENTRY_LIST_GETDB } from '../modules/local/interpro_list_getdb'
 include { RFAM_GETMODELS           } from '../modules/local/rfam_getmodels'
 include { BAKTA_GETDB              } from '../modules/local/bakta_getdb'
+include { PSEUDOFINDER_GETDB       } from '../modules/local/pseudofinder_getdb'
 
 
 workflow DOWNLOAD_DATABASES {
@@ -26,6 +27,7 @@ workflow DOWNLOAD_DATABASES {
         interpro_entry_list = channel.empty()
         eggnog_db = channel.empty()
         bakta_db = channel.empty()
+        pseudofinder_db = channel.empty()
 
         amrfinder_plus_dir = file("$params.dbs/amrfinder/")
         antismash_dir = file("$params.dbs/antismash")
@@ -36,6 +38,7 @@ workflow DOWNLOAD_DATABASES {
         eggnog_data_dir = file("$params.dbs/eggnog")
         rfam_ncrna_models = file("$params.dbs/rfam_models/rfam_ncrna_cms")
         bakta_dir = file("$params.dbs/bakta")
+        pseudofinder_dir = file("$params.dbs/uniprot_sprot")
 
         if (amrfinder_plus_dir.exists()) {
             amrfinder_plus_db = tuple(
@@ -126,6 +129,17 @@ workflow DOWNLOAD_DATABASES {
             )
         }
 
+        if (pseudofinder_dir.exists()) {
+            log.info("Pseudofinder database exists, or at least the expected folder.")
+            pseudofinder_db = tuple(
+                pseudofinder_dir,
+                file("${pseudofinder_dir}/VERSION.txt", checkIfExists: true).text // the DB version
+            )
+        } else {
+            PSEUDOFINDER_GETDB()
+            pseudofinder_db = PSEUDOFINDER_GETDB.out.pseudofinder_db.first()
+        }
+
         if (params.bakta) {
             if (bakta_dir.exists()) {
                 log.info("Bakta database exists, or at least the expected folders.")
@@ -148,5 +162,6 @@ workflow DOWNLOAD_DATABASES {
         eggnog_db = eggnog_db
         rfam_ncrna_models = rfam_ncrna_models
         bakta_db = bakta_db
+        pseudofinder_db = pseudofinder_db
 
 }
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_inputs/deduplication_script_test_input.gff b/tests/scripts/data/deduplication_script_test_input.gff
similarity index 100%
rename from tests/test_inputs/deduplication_script_test_input.gff
rename to tests/scripts/data/deduplication_script_test_input.gff
diff --git a/tests/test_inputs/dummy_reference.gff b/tests/scripts/data/dummy_reference.gff
similarity index 100%
rename from tests/test_inputs/dummy_reference.gff
rename to tests/scripts/data/dummy_reference.gff
diff --git a/tests/scripts/data/pseudofinder_test_inputs/expected_processed_result.gff b/tests/scripts/data/pseudofinder_test_inputs/expected_processed_result.gff
new file mode 100644
index 00000000..7afbee33
--- /dev/null
+++ b/tests/scripts/data/pseudofinder_test_inputs/expected_processed_result.gff
@@ -0,0 +1,4 @@
+##gff-version 3
+#!annotation-date	2024-12-10 16:01:58
+##sequence-region NZ_LN831051.1 1 2110968
+NZ_LN831051.1	pseudofinder	gene	1425	2302	.	+	.	note=Pseudogene candidate. Reason(s):Predicted fragmentation of a single gene.;locus_tag=GCF_001457635.1_1_pseudo_00001;old_locus_tag=GCF_001457635.1_00003,GCF_001457635.1_1_ign_00003,GCF_001457635.1_00004
diff --git a/tests/scripts/data/pseudofinder_test_inputs/prokka_compliant.gff b/tests/scripts/data/pseudofinder_test_inputs/prokka_compliant.gff
new file mode 100644
index 00000000..d578e983
--- /dev/null
+++ b/tests/scripts/data/pseudofinder_test_inputs/prokka_compliant.gff
@@ -0,0 +1,10 @@
+##gff-version 3
+##sequence-region gnl|Prokka|GCF_001457635.1_1 1 2110968
+gnl|Prokka|GCF_001457635.1_1	prokka	gene	558	758	.	+	.	ID=GCF_001457635.1_00001_gene;locus_tag=GCF_001457635.1_00001
+gnl|Prokka|GCF_001457635.1_1	Prodigal:002006	CDS	558	758	.	+	0	ID=GCF_001457635.1_00001;Parent=GCF_001457635.1_00001_gene;inference=ab initio prediction:Prodigal:002006;locus_tag=GCF_001457635.1_00001;product=hypothetical protein;protein_id=gnl|Prokka|GCF_001457635.1_00001
+gnl|Prokka|GCF_001457635.1_1	prokka	gene	991	1263	.	+	.	ID=GCF_001457635.1_00002_gene;locus_tag=GCF_001457635.1_00002
+gnl|Prokka|GCF_001457635.1_1	Prodigal:002006	CDS	991	1263	.	+	0	ID=GCF_001457635.1_00002;Parent=GCF_001457635.1_00002_gene;inference=ab initio prediction:Prodigal:002006;locus_tag=GCF_001457635.1_00002;product=hypothetical protein;protein_id=gnl|Prokka|GCF_001457635.1_00002
+gnl|Prokka|GCF_001457635.1_1	prokka	gene	1425	1559	.	+	.	ID=GCF_001457635.1_00003_gene;Name=pgk_1;gene=pgk_1;locus_tag=GCF_001457635.1_00003
+gnl|Prokka|GCF_001457635.1_1	Prodigal:002006	CDS	1425	1559	.	+	0	ID=GCF_001457635.1_00003;Parent=GCF_001457635.1_00003_gene;eC_number=2.7.2.3;Name=pgk_1;db_xref=COG:COG0126;gene=pgk_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8DQX8;locus_tag=GCF_001457635.1_00003;product=Phosphoglycerate kinase;protein_id=gnl|Prokka|GCF_001457635.1_00003
+gnl|Prokka|GCF_001457635.1_1	prokka	gene	1958	2302	.	+	.	ID=GCF_001457635.1_00005_gene;Name=pgk_2;gene=pgk_2;locus_tag=GCF_001457635.1_00005
+gnl|Prokka|GCF_001457635.1_1	Prodigal:002006	CDS	1958	2302	.	+	0	ID=GCF_001457635.1_00005;Parent=GCF_001457635.1_00005_gene;eC_number=2.7.2.3;Name=pgk_2;db_xref=COG:COG0126;gene=pgk_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8DQX8;locus_tag=GCF_001457635.1_00005;product=Phosphoglycerate kinase;protein_id=gnl|Prokka|GCF_001457635.1_00005
diff --git a/tests/scripts/data/pseudofinder_test_inputs/prokka_standard.gff b/tests/scripts/data/pseudofinder_test_inputs/prokka_standard.gff
new file mode 100644
index 00000000..f1bc8a25
--- /dev/null
+++ b/tests/scripts/data/pseudofinder_test_inputs/prokka_standard.gff
@@ -0,0 +1,7 @@
+##gff-version 3
+##sequence-region NZ_LN831051.1 1 2110968
+NZ_LN831051.1	Prodigal:002006	CDS	558	758	.	+	0	ID=GCF_001457635.1_00001;inference=ab initio prediction:Prodigal:002006;locus_tag=GCF_001457635.1_00001;product=hypothetical protein
+NZ_LN831051.1	Prodigal:002006	CDS	991	1263	.	+	0	ID=GCF_001457635.1_00002;inference=ab initio prediction:Prodigal:002006;locus_tag=GCF_001457635.1_00002;product=hypothetical protein
+NZ_LN831051.1	Prodigal:002006	CDS	1425	1559	.	+	0	ID=GCF_001457635.1_00003;eC_number=2.7.2.3;Name=pgk_1;db_xref=COG:COG0126;gene=pgk_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8DQX8;locus_tag=GCF_001457635.1_00003;product=Phosphoglycerate kinase
+NZ_LN831051.1	Prodigal:002006	CDS	1958	2302	.	+	0	ID=GCF_001457635.1_00004;eC_number=2.7.2.3;Name=pgk_2;db_xref=COG:COG0126;gene=pgk_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8DQX8;locus_tag=GCF_001457635.1_00004;product=Phosphoglycerate kinase
+NZ_LN831051.1	Prodigal:002006	CDS	2286	2615	.	+	0	ID=GCF_001457635.1_00005;eC_number=2.7.2.3;Name=pgk_3;gene=pgk_3;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_00145;locus_tag=GCF_001457635.1_00005;product=Phosphoglycerate kinase
diff --git a/tests/scripts/data/pseudofinder_test_inputs/pseudofinder_result.gff b/tests/scripts/data/pseudofinder_test_inputs/pseudofinder_result.gff
new file mode 100644
index 00000000..3142d5aa
--- /dev/null
+++ b/tests/scripts/data/pseudofinder_test_inputs/pseudofinder_result.gff
@@ -0,0 +1,4 @@
+##gff-version 3
+#!annotation-date	2024-12-10 16:01:58
+##sequence-region GCF_001457635.1_1 1 2110968
+GCF_001457635.1_1	pseudofinder	gene	1425	2302	.	+	.	note=Pseudogene candidate. Reason(s):Predicted fragmentation of a single gene.;locus_tag=GCF_001457635.1_1_pseudo_00001;old_locus_tag=GCF_001457635.1_00003,GCF_001457635.1_1_ign_00003,GCF_001457635.1_00005
diff --git a/tests/scripts/test_add_hypothetical_protein_descriptions.py b/tests/scripts/test_add_hypothetical_protein_descriptions.py
index 71d140c4..daad9b74 100644
--- a/tests/scripts/test_add_hypothetical_protein_descriptions.py
+++ b/tests/scripts/test_add_hypothetical_protein_descriptions.py
@@ -1,254 +1,269 @@
-#!/bin/env python3
+#!/usr/bin/env python3
+
+# Copyright 2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import copy
-import os
 import re
-import sys
-import unittest
 
-sys.path.append(os.path.sep.join(os.getcwd().split(os.path.sep)[:-2]))
+import pytest
 
 from bin.add_hypothetical_protein_descriptions import (
-    keep_or_move_to_note,
-    move_function_to_note,
+    GeneCaller,
     clean_up_function,
-    insert_product_source,
-    get_function,
     escape_reserved_characters,
+    get_function,
+    insert_product_source,
+    keep_or_move_to_note,
+    move_function_to_note,
 )
 
 
-class TestHypotheticalProteinAnnotation(unittest.TestCase):
-    def setUp(self):
-        self.eggnog_annot = {"my_id": "eggnog function"}
-        self.attributes_dict = {
-            "ID": "my_id",
-            "product": "hypothetical protein",
-            "protein_id": "protein id",
-        }
-        self.ipr_info = {
-            "my_id": {
-                "Family": {
-                    "NCBIFam": {
-                        "match": 0.80,
-                        "ipr_desc": "NCBI Fam function",
-                        "sig_desc": "sig desc",
-                        "level": 1,
-                    },
-                    "Pfam": {
-                        "match": 0.90,
-                        "ipr_desc": "Pfam Fam function",
-                        "sig_desc": "sig desc",
-                        "level": 0,
-                    },
+@pytest.fixture
+def setup_data():
+    eggnog_annot = {"my_id": "eggnog function"}
+    attributes_dict = {
+        "ID": "my_id",
+        "product": "hypothetical protein",
+        "protein_id": "protein id",
+    }
+    ipr_info = {
+        "my_id": {
+            "Family": {
+                "NCBIFam": {
+                    "match": 0.80,
+                    "ipr_desc": "NCBI Fam function",
+                    "sig_desc": "sig desc",
+                    "level": 1,
                 },
-                "Domain": {
-                    "CDD": {
-                        "match": 1.00,
-                        "ipr_desc": "CDD Domain function",
-                        "sig_desc": "sig desc",
-                        "level": 0,
-                    }
+                "Pfam": {
+                    "match": 0.90,
+                    "ipr_desc": "Pfam Fam function",
+                    "sig_desc": "sig desc",
+                    "level": 0,
                 },
-            }
+            },
+            "Domain": {
+                "CDD": {
+                    "match": 1.00,
+                    "ipr_desc": "CDD Domain function",
+                    "sig_desc": "sig desc",
+                    "level": 0,
+                }
+            },
         }
-        self.ipr_memberdb_only = {
-            "my_id": {
-                "no_type": {
-                    "NCBIFam": {
-                        "match": 0.80,
-                        "ipr_desc": "NCBI Fam function",
-                        "sig_desc": "sig desc",
-                        "level": 2,
-                    },
-                    "Pfam": {
-                        "match": 0.90,
-                        "ipr_desc": "Pfam Fam function",
-                        "sig_desc": "sig desc",
-                        "level": 0,
-                    },
+    }
+    ipr_memberdb_only = {
+        "my_id": {
+            "no_type": {
+                "NCBIFam": {
+                    "match": 0.80,
+                    "ipr_desc": "NCBI Fam function",
+                    "sig_desc": "sig desc",
+                    "level": 2,
                 },
-            }
-        }
-
-    def test_get_function_ncbifam_priority(self):
-        # Tests which function source is best available. In this test, NCBIFam has priority.
-        result = get_function(
-            "my_id",
-            self.attributes_dict,
-            self.eggnog_annot,
-            self.ipr_info,
-            self.ipr_memberdb_only,
-        )
-        self.assertEqual(result, ("NCBI Fam function", "InterPro(NCBIFam)"))
-
-    def test_get_function_level_priority(self):
-        # Tests which function source is the best one available. In this test, Pfam has priority.
-        ipr_test = copy.deepcopy(self.ipr_info)
-        ipr_test["my_id"]["Family"]["Pfam"].update({"level": 1})
-        result = get_function(
-            "my_id",
-            self.attributes_dict,
-            self.eggnog_annot,
-            ipr_test,
-            self.ipr_memberdb_only,
-        )
-        self.assertEqual(result, ("Pfam Fam function", "InterPro(Pfam)"))
-
-    def test_get_function_level_eggnog(self):
-        # Tests which function source is the best one available. In this test, eggnog is the only result.
-        result = get_function(
-            "my_id",
-            self.attributes_dict,
-            self.eggnog_annot,
-            {},
-            {},
-        )
-        self.assertEqual(result, ("eggnog function", "eggNOG"))
-
-    def test_get_function_level_pfam_overtake(self):
-        # Tests which function source is the best one available. In this test, Pfam overtakes other annotations
-        # because it has priority.
-        ipr_test = copy.deepcopy(self.ipr_info)
-        del ipr_test["my_id"]["Family"]["NCBIFam"]
-        ipr_test["my_id"]["Family"].update({"CDD": ipr_test["my_id"]["Domain"]["CDD"]})
-        result = get_function(
-            "my_id",
-            self.attributes_dict,
-            self.eggnog_annot,
-            ipr_test,
-            self.ipr_memberdb_only,
-        )
-        self.assertEqual(result, ("Pfam Fam function", "InterPro(Pfam)"))
-
-    def test_get_function_level_pfam_overtake_rejected(self):
-        # Tests which function source is the best one available. In this test, Pfam should not take over other
-        # annotations because it has a higher level.
-        ipr_test = copy.deepcopy(self.ipr_info)
-        del ipr_test["my_id"]["Family"]["NCBIFam"]
-        ipr_test["my_id"]["Family"].update({"CDD": ipr_test["my_id"]["Domain"]["CDD"]})
-        ipr_test["my_id"]["Family"]["CDD"].update({"level": 1})
-        result = get_function(
-            "my_id",
-            self.attributes_dict,
-            self.eggnog_annot,
-            ipr_test,
-            self.ipr_memberdb_only,
-        )
-        self.assertEqual(result, ("CDD Domain function", "InterPro(CDD)"))
-
-    def test_insert_product_source(self):
-        # Tests adding the product source field to the 9th column, after the product field
-        product_source = "UniFIRE"
-        result = insert_product_source(self.attributes_dict, product_source)
-        self.assertEqual(
-            result,
-            {
-                "ID": "my_id",
-                "product": "hypothetical protein",
-                "product_source": "UniFIRE",
-                "protein_id": "protein id",
-            },
-        )
-
-    def test_escape_reserved_characters(self):
-        # Tests replacing special characters
-        func = "2,3-containing domain, functional protein; integrase"
-        result = escape_reserved_characters(func)
-        self.assertEqual(
-            result, "2%2C3-containing domain/ functional protein/ integrase"
-        )
-
-    def test_clean_up_function_domain(self):
-        # Description ends with "domain", need to add "-containing protein"
-        func = "ATPase, nucleotide binding domain"
-        result = clean_up_function(func)
-        self.assertEqual(result, "ATPase, nucleotide binding domain-containing protein")
-
-    def test_clean_up_function_no_change(self):
-        # Description contains the word "protein", no need to change
-        func = "Ribosomal protein L23/L15e core domain superfamily"
-        result = clean_up_function(func)
-        self.assertEqual(result, func)
-
-    def test_clean_up_function_add_protein(self):
-        # Description ends with "family" and doesn't contain the word protein, "protein" should be added
-        func = "Ribosomal L23/L15e core domain superfamily"
-        result = clean_up_function(func)
-        self.assertEqual(result, "Ribosomal L23/L15e core domain superfamily protein")
-
-    def test_keep_or_move_to_note_sentence(self):
-        # Function is a sentence, should be moved to the note field
-        func = "This is a protein"
-        result = keep_or_move_to_note(
-            func, "eggNOG", {"ID": "123", "locus_tag": "locus_tag"}
-        )
-        self.assertEqual(
-            result,
-            (
-                "hypothetical protein",
-                "Prokka",
-                {
-                    "ID": "123",
-                    "locus_tag": "locus_tag",
-                    "note": "eggNOG:This is a protein",
+                "Pfam": {
+                    "match": 0.90,
+                    "ipr_desc": "Pfam Fam function",
+                    "sig_desc": "sig desc",
+                    "level": 0,
                 },
-            ),
-        )
-
-    def test_keep_or_move_to_note_correct(self):
-        # Function is in the correct format, keep it as is.
-        func = "ABC-hydrolase interfering protein with extra large domain"
-        result = keep_or_move_to_note(
-            func, "eggNOG", {"ID": "123", "locus_tag": "locus_tag"}
-        )
-        self.assertEqual(
-            result,
-            (
-                func,
-                "eggNOG",
-                {"ID": "123", "locus_tag": "locus_tag"},
-            ),
-        )
-
-    def test_move_function_to_note_with_existing_note(self):
-        # Tests appending function description to the existing text in the note field
-        col9_with_note = "ID=BU_ATCC8492_03165;locus_tag=BU_ATCC8492_03165;note=UPF0056 inner membrane protein YhgN;product=NAAT family transporter;product_source=NCBIfam;eggNOG=585543.HMPREF0969_01099"
-        col9_dict = dict(
-            re.split(r"(?<!\\)=", item)
-            for item in re.split(r"(?<!\\);", col9_with_note)
-        )
-
-        expected_result = {
-            "ID": "BU_ATCC8492_03165",
-            "locus_tag": "BU_ATCC8492_03165",
-            "note": "UPF0056 inner membrane protein YhgN, eggNOG:some function",
-            "product": "NAAT family transporter",
-            "product_source": "NCBIfam",
-            "eggNOG": "585543.HMPREF0969_01099",
+            },
         }
+    }
+    return eggnog_annot, attributes_dict, ipr_info, ipr_memberdb_only
 
-        result = move_function_to_note("some function", col9_dict)
-        self.assertEqual(result, expected_result)
-
-    def test_move_function_to_note_with_no_note(self):
-        # Tests creating a note field where it doesn't already exist and saving the function into it
-        col9_no_note = "ID=BU_ATCC8492_00043;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00043;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969"
-        col9_dict = dict(
-            re.split(r"(?<!\\)=", item) for item in re.split(r"(?<!\\);", col9_no_note)
-        )
-        expected_result = {
-            "ID": "BU_ATCC8492_00043",
-            "inference": "ab initio prediction:Prodigal:002006",
-            "locus_tag": "BU_ATCC8492_00043",
-            "note": "eggNOG:some function",
-            "product": "Protein of unknown function DUF2961",
-            "product_source": "InterPro(Pfam)",
-            "eggNOG": "411479.BACUNI_03969",
-        }
-        result = move_function_to_note("some function", col9_dict)
-        self.assertEqual(result, expected_result)
+
+def test_get_function_ncbifam_priority(setup_data):
+    eggnog_annot, attributes_dict, ipr_info, ipr_memberdb_only = setup_data
+    result = get_function(
+        "my_id",
+        attributes_dict,
+        eggnog_annot,
+        ipr_info,
+        ipr_memberdb_only,
+        GeneCaller.PROKKA,
+    )
+    assert result == ("NCBI Fam function", "InterPro(NCBIFam)")
+
+
+def test_get_function_level_priority(setup_data):
+    eggnog_annot, attributes_dict, ipr_info, ipr_memberdb_only = setup_data
+    ipr_test = copy.deepcopy(ipr_info)
+    ipr_test["my_id"]["Family"]["Pfam"].update({"level": 1})
+    result = get_function(
+        "my_id",
+        attributes_dict,
+        eggnog_annot,
+        ipr_test,
+        ipr_memberdb_only,
+        GeneCaller.PROKKA,
+    )
+    assert result == ("Pfam Fam function", "InterPro(Pfam)")
+
+
+def test_get_function_level_eggnog(setup_data):
+    eggnog_annot, attributes_dict, _, _ = setup_data
+    result = get_function(
+        "my_id", attributes_dict, eggnog_annot, {}, {}, GeneCaller.PROKKA
+    )
+    assert result == ("eggnog function", "eggNOG")
+
+
+def test_get_function_level_pfam_overtake(setup_data):
+    eggnog_annot, attributes_dict, ipr_info, ipr_memberdb_only = setup_data
+    ipr_test = copy.deepcopy(ipr_info)
+    del ipr_test["my_id"]["Family"]["NCBIFam"]
+    ipr_test["my_id"]["Family"].update({"CDD": ipr_test["my_id"]["Domain"]["CDD"]})
+    result = get_function(
+        "my_id",
+        attributes_dict,
+        eggnog_annot,
+        ipr_test,
+        ipr_memberdb_only,
+        GeneCaller.PROKKA,
+    )
+    assert result == ("Pfam Fam function", "InterPro(Pfam)")
+
+
+def test_get_function_level_pfam_overtake_rejected(setup_data):
+    eggnog_annot, attributes_dict, ipr_info, ipr_memberdb_only = setup_data
+    ipr_test = copy.deepcopy(ipr_info)
+    del ipr_test["my_id"]["Family"]["NCBIFam"]
+    ipr_test["my_id"]["Family"].update({"CDD": ipr_test["my_id"]["Domain"]["CDD"]})
+    ipr_test["my_id"]["Family"]["CDD"].update({"level": 1})
+    result = get_function(
+        "my_id",
+        attributes_dict,
+        eggnog_annot,
+        ipr_test,
+        ipr_memberdb_only,
+        GeneCaller.PROKKA,
+    )
+    assert result == ("CDD Domain function", "InterPro(CDD)")
+
+
+def test_insert_product_source(setup_data):
+    _, attributes_dict, _, _ = setup_data
+    product_source = "UniFIRE"
+    result = insert_product_source(attributes_dict, product_source)
+    assert result == {
+        "ID": "my_id",
+        "product": "hypothetical protein",
+        "product_source": "UniFIRE",
+        "protein_id": "protein id",
+    }
+
+
+def test_escape_reserved_characters():
+    func = "2,3-containing domain, functional protein; integrase"
+    result = escape_reserved_characters(func)
+    assert result == "2%2C3-containing domain/ functional protein/ integrase"
+
+
+def test_clean_up_function_domain():
+    func = "ATPase, nucleotide binding domain"
+    result = clean_up_function(func)
+    assert result == "ATPase, nucleotide binding domain-containing protein"
+
+
+def test_clean_up_function_no_change():
+    func = "Ribosomal protein L23/L15e core domain superfamily"
+    result = clean_up_function(func)
+    assert result == func
+
+
+def test_clean_up_function_add_protein():
+    func = "Ribosomal L23/L15e core domain superfamily"
+    result = clean_up_function(func)
+    assert result == "Ribosomal L23/L15e core domain superfamily protein"
+
+
+def test_keep_or_move_to_note_sentence():
+    func = "This is a protein"
+    result = keep_or_move_to_note(
+        func, "eggNOG", {"ID": "123", "locus_tag": "locus_tag"}, GeneCaller.PROKKA
+    )
+    assert result == (
+        "hypothetical protein",
+        "Prokka",
+        {"ID": "123", "locus_tag": "locus_tag", "Note": "eggNOG:This is a protein"},
+    )
+
+
+def test_keep_or_move_to_note_correct():
+    func = "ABC-hydrolase interfering protein with extra large domain"
+    result = keep_or_move_to_note(
+        func, "eggNOG", {"ID": "123", "locus_tag": "locus_tag"}, GeneCaller.PROKKA
+    )
+    assert result == (
+        func,
+        "eggNOG",
+        {"ID": "123", "locus_tag": "locus_tag"},
+    )
+
+
+def test_move_function_to_note_with_existing_note():
+    col9_with_note = ";".join(
+        [
+            "ID=BU_ATCC8492_03165",
+            "locus_tag=BU_ATCC8492_03165",
+            "Note=UPF0056 inner membrane protein YhgN",
+            "product=NAAT family transporter",
+            "product_source=NCBIfam",
+            "eggNOG=585543.HMPREF0969_01099",
+        ]
+    )
+
+    col9_dict = dict(
+        re.split(r"(?<!\\)=", item) for item in re.split(r"(?<!\\);", col9_with_note)
+    )
+
+    expected_result = {
+        "ID": "BU_ATCC8492_03165",
+        "locus_tag": "BU_ATCC8492_03165",
+        "Note": "UPF0056 inner membrane protein YhgN, eggNOG:some function",
+        "product": "NAAT family transporter",
+        "product_source": "NCBIfam",
+        "eggNOG": "585543.HMPREF0969_01099",
+    }
+
+    result = move_function_to_note("some function", col9_dict)
+    assert result == expected_result
 
 
-if __name__ == "__main__":
-    unittest.main()
+def test_move_function_to_note_with_no_note():
+    col9_no_note = ";".join(
+        [
+            "ID=BU_ATCC8492_00043",
+            "inference=ab initio prediction:Prodigal:002006",
+            "locus_tag=BU_ATCC8492_00043",
+            "product=Protein of unknown function DUF2961",
+            "product_source=InterPro(Pfam)",
+            "eggNOG=411479.BACUNI_03969",
+        ]
+    )
+    col9_dict = dict(
+        re.split(r"(?<!\\)=", item) for item in re.split(r"(?<!\\);", col9_no_note)
+    )
+    expected_result = {
+        "ID": "BU_ATCC8492_00043",
+        "inference": "ab initio prediction:Prodigal:002006",
+        "locus_tag": "BU_ATCC8492_00043",
+        "Note": "eggNOG:some function",
+        "product": "Protein of unknown function DUF2961",
+        "product_source": "InterPro(Pfam)",
+        "eggNOG": "411479.BACUNI_03969",
+    }
+    result = move_function_to_note("some function", col9_dict)
+    assert result == expected_result
diff --git a/tests/scripts/test_adjust_pseudofinder_output.py b/tests/scripts/test_adjust_pseudofinder_output.py
new file mode 100644
index 00000000..a2aae3f9
--- /dev/null
+++ b/tests/scripts/test_adjust_pseudofinder_output.py
@@ -0,0 +1,39 @@
+import filecmp
+import unittest
+from pathlib import Path
+
+from bin.adjust_pseudofinder_output import main
+
+
+class TestAdjustPseudofinderOutput(unittest.TestCase):
+    def setUp(self):
+        self.prokka_standard = (
+            Path(__file__).resolve().parent
+            / "data/pseudofinder_test_inputs/prokka_standard.gff"
+        )
+        self.prokka_compliant = (
+            Path(__file__).resolve().parent
+            / "data/pseudofinder_test_inputs/prokka_compliant.gff"
+        )
+        self.pseudofinder_output = (
+            Path(__file__).resolve().parent
+            / "data/pseudofinder_test_inputs/pseudofinder_result.gff"
+        )
+        self.expected_output = (
+            Path(__file__).resolve().parent
+            / "data/pseudofinder_test_inputs/expected_processed_result.gff"
+        )
+
+    def test_main(self):
+        test_output = "test_pseudofinder_processed_file.gff"
+        main(
+            self.pseudofinder_output,
+            self.prokka_standard,
+            self.prokka_compliant,
+            test_output,
+        )
+        self.assertTrue(
+            filecmp.cmp(test_output, self.expected_output, shallow=False),
+            "Generated file does not match the expected file.",
+        )
+        # os.remove(test_output)
diff --git a/tests/scripts/test_duplicate_resolution.py b/tests/scripts/test_duplicate_resolution.py
index fe32afed..28a91c52 100644
--- a/tests/scripts/test_duplicate_resolution.py
+++ b/tests/scripts/test_duplicate_resolution.py
@@ -1,30 +1,30 @@
-#!/bin/env python3
+#!/usr/bin/env python3
 
-import copy
-import os
-import re
-import sys
-import unittest
+# Copyright 2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
-sys.path.append(os.path.sep.join(os.getcwd().split(os.path.sep)[:-2]))
+import os
 
 from postprocessing.duplicate_resolution import main
 
-DUMMY_REFERENCE = os.path.join(
-    os.path.sep.join(os.getcwd().split(os.path.sep)[:-1]),
-    "test_inputs",
-    "dummy_reference.gff",
-)
-TEST_INPUT = os.path.join(
-    os.path.sep.join(os.getcwd().split(os.path.sep)[:-1]),
-    "test_inputs",
-    "deduplication_script_test_input.gff",
-)
-
 
-class TestDuplicateResolution(unittest.TestCase):
-    def test_main(self):
-        result = main(
-            DUMMY_REFERENCE, TEST_INPUT, "deduplication_test_outfile.gff", "uniformis"
-        )
-        self.assertEqual(result, {"replaced": 6, "unable_to_decide": 5})
+def test_main(shared_datadir, tmp_path):
+    result = main(
+        (shared_datadir / "dummy_reference.gff"),
+        (shared_datadir / "deduplication_script_test_input.gff"),
+        (tmp_path / "deduplication_test_outfile.gff"),
+        "uniformis",
+    )
+    os.remove("uniformis_replacements.txt")
+    os.remove("uniformis_stats.txt")
+    assert result == {"replaced": 6, "unable_to_decide": 5}
diff --git a/tests/workflows/test_e2e_mettannotator.yaml b/tests/workflows/disabled_test_e2e_mettannotator.yaml
similarity index 100%
rename from tests/workflows/test_e2e_mettannotator.yaml
rename to tests/workflows/disabled_test_e2e_mettannotator.yaml
diff --git a/workflows/mettannotator.nf b/workflows/mettannotator.nf
index 918b7be0..c5dda2d1 100644
--- a/workflows/mettannotator.nf
+++ b/workflows/mettannotator.nf
@@ -30,7 +30,8 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 include { LOOKUP_KINGDOM                             } from '../modules/local/lookup_kingdom'
-include { PROKKA                                     } from '../modules/local/prokka'
+include { PROKKA as PROKKA_STANDARD                  } from '../modules/local/prokka'
+include { PROKKA as PROKKA_COMPLIANT                 } from '../modules/local/prokka'
 include { AMRFINDER_PLUS; AMRFINDER_PLUS_TO_GFF      } from '../modules/local/amrfinder_plus'
 include { DEFENSE_FINDER                             } from '../modules/local/defense_finder'
 include { CRISPRCAS_FINDER                           } from '../modules/local/crisprcasfinder'
@@ -45,6 +46,8 @@ include { ANNOTATE_GFF                               } from '../modules/local/an
 include { ANTISMASH                                  } from '../modules/local/antismash'
 include { DBCAN                                      } from '../modules/local/dbcan'
 include { CIRCOS_PLOT                                } from '../modules/local/circos_plot'
+include { PSEUDOFINDER                               } from '../modules/local/pseudofinder'
+include { PSEUDOFINDER_POSTPROCESSING                } from '../modules/local/pseudofinder'
 
 include { DOWNLOAD_DATABASES                         } from '../subworkflows/download_databases'
 
@@ -87,6 +90,8 @@ eggnog_data = channel.empty()
 
 rfam_ncrna_models = channel.empty()
 
+pseudofinder_db = channel.empty()
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -118,6 +123,8 @@ workflow METTANNOTATOR {
 
         rfam_ncrna_models = DOWNLOAD_DATABASES.out.rfam_ncrna_models
 
+        pseudofinder_db = DOWNLOAD_DATABASES.out.pseudofinder_db
+
         if (params.bakta) {
             bakta_db = DOWNLOAD_DATABASES.out.bakta_db
         }
@@ -162,6 +169,12 @@ workflow METTANNOTATOR {
             file(params.rfam_ncrna_models, checkIfExists: true),
             params.rfam_ncrna_models_rfam_version
         )
+
+        pseudofinder_db = tuple(
+            file(params.pseudofinder_db, checkIfExists: true),
+            params.pseudofinder_db_version
+        )
+
         if (params.bakta) {
             bakta_db = tuple(
                 file(params.bakta_db, checkIfExists: true),
@@ -177,6 +190,8 @@ workflow METTANNOTATOR {
     annotations_gbk = channel.empty()
     annotations_faa = channel.empty()
     annotations_gff = channel.empty()
+    compliant_gbk = channel.empty()
+    compliant_gff = channel.empty()
 
     LOOKUP_KINGDOM( assemblies )
     ch_versions = ch_versions.mix(LOOKUP_KINGDOM.out.versions.first())
@@ -191,26 +206,34 @@ workflow METTANNOTATOR {
 
        BAKTA_BAKTA( assemblies_to_annotate.bacteria, bakta_db )
 
-       PROKKA( assemblies_to_annotate.archaea )
+       PROKKA_STANDARD( assemblies_to_annotate.archaea, Channel.value("standard") )
+       PROKKA_COMPLIANT( assemblies_to_annotate.archaea, Channel.value("compliant") )
 
        ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions.first())
-       ch_versions = ch_versions.mix(PROKKA.out.versions.first())
+       ch_versions = ch_versions.mix(PROKKA_STANDARD.out.versions.first())
+
+       annotations_fna = annotations_fna.mix( BAKTA_BAKTA.out.fna ).mix( PROKKA_STANDARD.out.fna )
+       annotations_gbk = annotations_gbk.mix( BAKTA_BAKTA.out.gbk ).mix( PROKKA_STANDARD.out.gbk )
+       annotations_faa = annotations_faa.mix( BAKTA_BAKTA.out.faa ).mix( PROKKA_STANDARD.out.faa )
+       annotations_gff = annotations_gff.mix( BAKTA_BAKTA.out.gff ).mix( PROKKA_STANDARD.out.gff )
 
-       annotations_fna = annotations_fna.mix( BAKTA_BAKTA.out.fna ).mix( PROKKA.out.fna )
-       annotations_gbk = annotations_gbk.mix( BAKTA_BAKTA.out.gbk ).mix( PROKKA.out.gbk )
-       annotations_faa = annotations_faa.mix( BAKTA_BAKTA.out.faa ).mix( PROKKA.out.faa )
-       annotations_gff = annotations_gff.mix( BAKTA_BAKTA.out.gff ).mix( PROKKA.out.gff )
+       compliant_gbk = compliant_gbk.mix( BAKTA_BAKTA.out.gbk ).mix( PROKKA_COMPLIANT.out.gbk )
+       compliant_gff = compliant_gff.mix( BAKTA_BAKTA.out.gff ).mix( PROKKA_COMPLIANT.out.gff )
 
    } else {
 
-       PROKKA( assemblies_with_kingdom )
+       PROKKA_STANDARD( assemblies_with_kingdom, Channel.value("standard") )
+       PROKKA_COMPLIANT( assemblies_with_kingdom, Channel.value("compliant") )
 
-       ch_versions = ch_versions.mix(PROKKA.out.versions.first())
+       ch_versions = ch_versions.mix(PROKKA_STANDARD.out.versions.first())
 
-       annotations_fna = PROKKA.out.fna
-       annotations_gbk = PROKKA.out.gbk
-       annotations_faa = PROKKA.out.faa
-       annotations_gff = PROKKA.out.gff
+       annotations_fna = PROKKA_STANDARD.out.fna
+       annotations_gbk = PROKKA_STANDARD.out.gbk
+       annotations_faa = PROKKA_STANDARD.out.faa
+       annotations_gff = PROKKA_STANDARD.out.gff
+
+       compliant_gbk = PROKKA_COMPLIANT.out.gbk
+       compliant_gff = PROKKA_COMPLIANT.out.gff
    }
 
     assemblies_for_quast = assemblies.join(
@@ -224,6 +247,23 @@ workflow METTANNOTATOR {
 
     ch_versions = ch_versions.mix(QUAST.out.versions.first())
 
+    PSEUDOFINDER(
+        compliant_gbk,
+        pseudofinder_db
+    )
+
+    ch_versions = ch_versions.mix(PSEUDOFINDER.out.versions.first())
+
+    PSEUDOFINDER_POSTPROCESSING(
+        annotations_gff.join(
+            compliant_gff
+        ).join(
+            PSEUDOFINDER.out.pseudofinder_gff
+        )
+    )
+
+    ch_versions = ch_versions.mix(PSEUDOFINDER_POSTPROCESSING.out.versions.first())
+
     CRISPRCAS_FINDER( assemblies )
 
     ch_versions = ch_versions.mix(CRISPRCAS_FINDER.out.versions.first())
@@ -279,7 +319,7 @@ workflow METTANNOTATOR {
 
     ch_versions = ch_versions.mix(AMRFINDER_PLUS_TO_GFF.out.versions.first())
 
-    DEFENSE_FINDER (
+    DEFENSE_FINDER(
         annotations_faa.join( annotations_gff ),
         defense_finder_db
     )
@@ -353,6 +393,8 @@ workflow METTANNOTATOR {
         DBCAN.out.dbcan_gff, remainder: true
     ).join(
         DEFENSE_FINDER.out.gff, remainder: true
+    ).join(
+        PSEUDOFINDER_POSTPROCESSING.out.pseudofinder_processed_gff, remainder: true
     )
 
     if ( !params.fast ) {
@@ -383,13 +425,13 @@ workflow METTANNOTATOR {
 
     ch_versions = ch_versions.mix(ANNOTATE_GFF.out.versions.first())
 
-    CIRCOS_PLOT (
+    CIRCOS_PLOT(
         ANNOTATE_GFF.out.annotated_gff
     )
 
     ch_versions = ch_versions.mix(CIRCOS_PLOT.out.versions.first())
 
-    CUSTOM_DUMPSOFTWAREVERSIONS (
+    CUSTOM_DUMPSOFTWAREVERSIONS(
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
     )
 
@@ -409,7 +451,7 @@ workflow METTANNOTATOR {
     ch_multiqc_files = ch_multiqc_files.mix( QUAST.out.results.collect { it[1] }.ifEmpty([]) )
 
 
-    MULTIQC (
+    MULTIQC(
         ch_multiqc_files.collect(),
         ch_multiqc_config.toList(),
         ch_multiqc_custom_config.toList(),