From a5df18f2759e0778810729c1fabb065263d5a9ef Mon Sep 17 00:00:00 2001 From: pirovc <4673375+pirovc@users.noreply.github.com> Date: Fri, 8 Apr 2022 09:44:24 +0200 Subject: [PATCH] v0.4.1 (#48) * v0.4.1, no default -d, fix and improved debug -Z, fix link gtdb release 07-RS207 * threads on check missing files --- README.md | 15 ++--- genome_updater.sh | 100 +++++++++++++++++++-------------- tests/integration_offline.bats | 61 +++++++++++--------- tests/integration_online.bats | 12 ++-- 4 files changed, 106 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index e717205..e18c571 100755 --- a/README.md +++ b/README.md @@ -199,10 +199,10 @@ or ┌─┐┌─┐┌┐┌┌─┐┌┬┐┌─┐ ┬ ┬┌─┐┌┬┐┌─┐┌┬┐┌─┐┬─┐ │ ┬├┤ ││││ ││││├┤ │ │├─┘ ││├─┤ │ ├┤ ├┬┘ └─┘└─┘┘└┘└─┘┴ ┴└─┘────└─┘┴ ─┴┘┴ ┴ ┴ └─┘┴└─ - v0.4.0 + v0.4.1 Database options: - -d Database (comma-separated entries) [genbank, refseq] Default: refseq + -d Database (comma-separated entries) [genbank, refseq] Organism options: -g Organism group (comma-separated entries) [archaea, bacteria, fungi, human, invertebrate, metagenomes, other, plant, protozoa, vertebrate_mammalian, vertebrate_other, viral]. Example: archaea,bacteria. @@ -227,9 +227,9 @@ or Default: 0 -F custom filter for the assembly summary in the format colA:val1|colB:valX,valY (case insensitive). Example: -F "2:PRJNA12377,PRJNA670754|14:Partial" for column infos check ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt Default: "" - -D Start date to keep sequences (>=), based on the sequence release date. Format YYYYMMDD. Example: -D 20201030 + -D Start date to keep sequences (>=), based on the sequence release date. Format YYYYMMDD. Example: 20201030 Default: "" - -E End date to keep sequences (<=), based on the sequence release date. Format YYYYMMDD. Example: -D 20201231 + -E End date to keep sequences (<=), based on the sequence release date. Format YYYYMMDD. Example: 20201231 Default: "" -z Keep only assemblies present on the latest GTDB release @@ -251,8 +251,8 @@ or Default: "" -k Dry-run, no data is downloaded or updated - just checks for available sequences and changes -i Fix failed downloads or any incomplete data from a previous run, keep current version - -m Check MD5 for downloaded files - -t Threads + -m Check MD5 of downloaded files + -t Threads to parallelize download and some file operations Default: 1 Misc. options: @@ -263,7 +263,8 @@ or -n Conditional exit status. Exit Code = 1 if more than N files failed to download (integer for file number, float for percentage, 0 -> off) Default: 0 -V Verbose log to report successful file downloads - -D Print print debug information and exit + -Z Print debug information and run in debug mode + ## References: diff --git a/genome_updater.sh b/genome_updater.sh index e7931a7..9df41b4 100755 --- a/genome_updater.sh +++ b/genome_updater.sh @@ -25,7 +25,7 @@ IFS=$' ' # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -version="0.4.0" +version="0.4.1" genome_updater_args=$( printf "%q " "$@" ) export genome_updater_args @@ -44,7 +44,7 @@ use_curl=${use_curl:-0} # Export locale numeric to avoid errors on printf in different setups export LC_NUMERIC="en_US.UTF-8" -gtdb_urls=( "https://data.gtdb.ecogenomic.org/releases/latest/ar122_taxonomy.tsv.gz" +gtdb_urls=( "https://data.gtdb.ecogenomic.org/releases/latest/ar53_taxonomy.tsv.gz" "https://data.gtdb.ecogenomic.org/releases/latest/bac120_taxonomy.tsv.gz" ) #activate aliases in the script @@ -500,7 +500,7 @@ remove_files() # parameter: ${1} file, ${2} fields [assembly_accesion,url] OR fi check_missing_files() # ${1} file, ${2} fields [assembly_accesion,url], ${3} extension - returns assembly accession, url and filename { # Just returns if file doesn't exist or if it's zero size - list_files ${1} ${2} ${3} | xargs --no-run-if-empty -n3 sh -c 'if [ ! -s "'"${target_output_prefix}${files_dir}"'${2}" ]; then echo "${0}'$'\t''${1}'$'\t''${2}"; fi' + list_files ${1} ${2} ${3} | xargs -P "${threads}" --no-run-if-empty -n3 sh -c 'if [ ! -s "'"${target_output_prefix}${files_dir}"'${2}" ]; then echo "${0}'$'\t''${1}'$'\t''${2}"; fi' } check_complete_record() # parameters: ${1} file, ${2} field [assembly accession, url], ${3} extension - returns assembly accession, url @@ -575,7 +575,7 @@ print_debug() # parameters: ${1} tools } # Defaults -database="refseq" +database="" organism_group="" species="" taxids="" @@ -624,7 +624,7 @@ function showhelp { print_logo echo echo $'Database options:' - echo $' -d Database (comma-separated entries) [genbank, refseq]\tDefault: refseq' + echo $' -d Database (comma-separated entries) [genbank, refseq]' echo echo $'Organism options:' echo $' -g Organism group (comma-separated entries) [archaea, bacteria, fungi, human, invertebrate, metagenomes, other, plant, protozoa, vertebrate_mammalian, vertebrate_other, viral]. Example: archaea,bacteria.\n\tDefault: ""' @@ -640,8 +640,8 @@ function showhelp { echo $' -P Number of top references for each species nodes to download. 0 for all. Selection order: RefSeq Category, Assembly level, Relation to type material, Date (most recent first)\n\tDefault: 0' echo $' -A Number of top references for each taxids (leaf nodes) to download. 0 for all. Selection order: RefSeq Category, Assembly level, Relation to type material, Date (most recent first)\n\tDefault: 0' echo $' -F custom filter for the assembly summary in the format colA:val1|colB:valX,valY (case insensitive). Example: -F "2:PRJNA12377,PRJNA670754|14:Partial" for column infos check ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt\n\tDefault: ""' - echo $' -D Start date to keep sequences (>=), based on the sequence release date. Format YYYYMMDD. Example: -D 20201030\n\tDefault: ""' - echo $' -E End date to keep sequences (<=), based on the sequence release date. Format YYYYMMDD. Example: -D 20201231\n\tDefault: ""' + echo $' -D Start date to keep sequences (>=), based on the sequence release date. Format YYYYMMDD. Example: 20201030\n\tDefault: ""' + echo $' -E End date to keep sequences (<=), based on the sequence release date. Format YYYYMMDD. Example: 20201231\n\tDefault: ""' echo $' -z Keep only assemblies present on the latest GTDB release' echo echo $'Report options:' @@ -657,8 +657,8 @@ function showhelp { echo $' -B Base label to use as the current version. Can be used to rollback to an older version or to create multiple branches from a base version. It only applies for updates. \n\tDefault: ""' echo $' -k Dry-run, no data is downloaded or updated - just checks for available sequences and changes' echo $' -i Fix failed downloads or any incomplete data from a previous run, keep current version' - echo $' -m Check MD5 for downloaded files' - echo $' -t Threads\n\tDefault: 1' + echo $' -m Check MD5 of downloaded files' + echo $' -t Threads to parallelize download and some file operations\n\tDefault: 1' echo echo $'Misc. options:' echo $' -x Allow the deletion of regular extra files if any found in the files folder. Symbolic links that do not belong to the current version will always be deleted.' @@ -667,7 +667,7 @@ function showhelp { echo $' -w Silent output with download progress (%) and download version at the end' echo $' -n Conditional exit status. Exit Code = 1 if more than N files failed to download (integer for file number, float for percentage, 0 -> off)\n\tDefault: 0' echo $' -V Verbose log to report successful file downloads' - echo $' -D Print print debug information and exit' + echo $' -Z Print debug information and run in debug mode' echo } @@ -690,52 +690,63 @@ done if [ "${tool_not_found}" -eq 1 ]; then exit 1; fi OPTIND=1 # Reset getopts -while getopts "d:g:S:T:c:l:F:o:e:R:b:B:t:f:P:A:D:E:zn:akixmurpswhDV" opt; do +while getopts "aA:b:B:d:D:c:De:E:f:F:g:hikl:mn:o:pP:rR:sS:t:T:uVwxzZ" opt; do case ${opt} in - d) database=${OPTARG} ;; - g) organism_group=${OPTARG// } ;; #remove spaces - S) species=${OPTARG// } ;; #remove spaces - T) taxids=${OPTARG// } ;; #remove spaces - c) refseq_category=${OPTARG} ;; - l) assembly_level=${OPTARG} ;; - F) custom_filter=${OPTARG} ;; - o) working_dir=${OPTARG} ;; - e) external_assembly_summary=${OPTARG} ;; - R) retry_download_batch=${OPTARG} ;; + a) download_taxonomy=1 ;; + A) top_assemblies_taxids=${OPTARG} ;; b) label=${OPTARG} ;; B) rollback_label=${OPTARG} ;; - t) threads=${OPTARG} ;; - f) file_formats=${OPTARG// } ;; #remove spaces - P) top_assemblies_species=${OPTARG} ;; - A) top_assemblies_taxids=${OPTARG} ;; + c) refseq_category=${OPTARG} ;; + d) database=${OPTARG} ;; D) date_start=${OPTARG} ;; + e) external_assembly_summary=${OPTARG} ;; E) date_end=${OPTARG} ;; - z) gtdb_only=1 ;; - a) download_taxonomy=1 ;; - k) dry_run=1 ;; + f) file_formats=${OPTARG// } ;; #remove spaces + F) custom_filter=${OPTARG} ;; + g) organism_group=${OPTARG// } ;; #remove spaces + h|\?) showhelp; exit 0 ;; i) just_fix=1 ;; - x) delete_extra_files=1 ;; + k) dry_run=1 ;; + l) assembly_level=${OPTARG} ;; m) check_md5=1 ;; - u) updated_assembly_accession=1 ;; - r) updated_sequence_accession=1 ;; - p) url_list=1 ;; n) conditional_exit=${OPTARG} ;; + o) working_dir=${OPTARG} ;; + p) url_list=1 ;; + P) top_assemblies_species=${OPTARG} ;; + r) updated_sequence_accession=1 ;; + R) retry_download_batch=${OPTARG} ;; s) silent=1 ;; - w) silent_progress=1 ;; - D) debug_mode=1 ;; + S) species=${OPTARG// } ;; #remove spaces + t) threads=${OPTARG} ;; + T) taxids=${OPTARG// } ;; #remove spaces + u) updated_assembly_accession=1 ;; V) verbose_log=1 ;; - h|\?) showhelp; exit 0 ;; + w) silent_progress=1 ;; + x) delete_extra_files=1 ;; + z) gtdb_only=1 ;; + Z) debug_mode=1 ;; :) echo "Option -${OPTARG} requires an argument." >&2; exit 1 ;; esac done -if [ ${OPTIND} -eq 1 ]; then showhelp; exit 1; fi -shift $((OPTIND-1)) -[ "${1:-}" = "--" ] && shift +# Print tools and versions if [ "${debug_mode}" -eq 1 ] ; then print_debug tools; - exit 0; + # If debug is the only parameter, exit, otherwise set debug mode for the run (set -x) + if [ ${OPTIND} -eq 2 ]; then + exit 0; + else + set -x + fi +fi +# No params +if [ ${OPTIND} -eq 1 ]; then + showhelp; + exit 1; fi +shift $((OPTIND-1)) +[ "${1:-}" = "--" ] && shift + ######################### General parameter validation ######################### if [[ -z "${database}" ]]; then echo "Database is required (-d)"; exit 1; @@ -842,7 +853,6 @@ if [[ "${MODE}" == "UPDATE" ]] || [[ "${MODE}" == "FIX" ]]; then # get existing if [[ -f "${rollback_assembly_summary}" ]]; then rm ${default_assembly_summary} ln -s -r "${rollback_assembly_summary}" "${default_assembly_summary}" - else echo "Rollback label/assembly_summary.txt not found ["${rollback_assembly_summary}"]"; exit 1 fi @@ -928,6 +938,10 @@ else fi echolog "-------------------------------------------" "1" +if [ "${debug_mode}" -eq 1 ] ; then + ls -laR "${working_dir}" +fi + # new if [[ "${MODE}" == "NEW" ]]; then @@ -983,7 +997,6 @@ if [[ "${MODE}" == "NEW" ]]; then fi echolog "" "1" fi - fi else # update/fix @@ -1176,6 +1189,11 @@ if [ "${dry_run}" -eq 0 ]; then if [ "${silent_progress}" -eq 1 ] ; then echo "$(dirname $(readlink -m ${default_assembly_summary}))" fi + + if [ "${debug_mode}" -eq 1 ] ; then + ls -laR "${working_dir}" + fi + # Exit conditional status exit $(exit_status ${expected_files} ${current_files}) fi diff --git a/tests/integration_offline.bats b/tests/integration_offline.bats index 6d6f7ca..e7b9e7a 100644 --- a/tests/integration_offline.bats +++ b/tests/integration_offline.bats @@ -25,6 +25,11 @@ setup_file() { assert_success } +@test "Run genome_updater.sh and show debug info" { + run ./genome_updater.sh -Z + assert_success +} + @test "DB refseq" { outdir=${outprefix}db-refseq/ label="test" @@ -59,14 +64,14 @@ setup_file() { @test "Organism group archaea" { outdir=${outprefix}og-archaea/ label="test" - run ./genome_updater.sh -o archaea -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -o archaea -b ${label} -o ${outdir} sanity_check ${outdir} ${label} } @test "Organism group archaea and fungi" { outdir=${outprefix}og-archaea-fungi/ label="test" - run ./genome_updater.sh -o archaea,fungi -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -o archaea,fungi -b ${label} -o ${outdir} sanity_check ${outdir} ${label} } @@ -78,7 +83,7 @@ setup_file() { #echo ${txids[@]} >&3 # Use third - run ./genome_updater.sh -S "${txids[2]}" -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -S "${txids[2]}" -b ${label} -o ${outdir} sanity_check ${outdir} ${label} # Check if output contains only used taxids @@ -98,7 +103,7 @@ setup_file() { #echo ${rscat[@]} >&3 # Use first - run ./genome_updater.sh -c "${rscat[0]}" -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -c "${rscat[0]}" -b ${label} -o ${outdir} sanity_check ${outdir} ${label} # Check if output contains only selected refseq category @@ -119,7 +124,7 @@ setup_file() { #echo ${aslev[@]} >&3 # Use first - run ./genome_updater.sh -l "${aslev[0]}" -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -l "${aslev[0]}" -b ${label} -o ${outdir} sanity_check ${outdir} ${label} # Check if output contains only selected assembly level @@ -141,7 +146,7 @@ setup_file() { aslev=( $(get_values_as ${local_dir}genomes/refseq/assembly_summary_refseq.txt 12 ) ) # Simulate refseq category and assembly level filter using the custom filter - run ./genome_updater.sh -F "5:${rscat[0]}|12:${aslev[0]}" -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -F "5:${rscat[0]}|12:${aslev[0]}" -b ${label} -o ${outdir} sanity_check ${outdir} ${label} # Check if output contains only selected refseq category @@ -253,7 +258,7 @@ setup_file() { @test "Report assembly accession" { outdir=${outprefix}report-assembly-accession/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -u + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -u sanity_check ${outdir} ${label} # Check if report was printed and has all lines reported @@ -265,7 +270,7 @@ setup_file() { @test "Report sequence accession" { outdir=${outprefix}report-sequence-accession/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -r + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -r sanity_check ${outdir} ${label} # Check if report was printed @@ -276,7 +281,7 @@ setup_file() { @test "Report urls" { outdir=${outprefix}report-urls/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -p + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -p sanity_check ${outdir} ${label} # Check if report was printed and has all lines reported @@ -292,7 +297,7 @@ setup_file() { outdir=${outprefix}external-assembly-summary/ label="test" # Get assembly_summary from -e (not directly from url) - run ./genome_updater.sh -b ${label} -o ${outdir} -e ${local_dir}genomes/refseq/assembly_summary_refseq.txt + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -e ${local_dir}genomes/refseq/assembly_summary_refseq.txt sanity_check ${outdir} ${label} } @@ -302,17 +307,17 @@ setup_file() { # Base version with only refseq label1="v1" - run ./genome_updater.sh -b ${label1} -o ${outdir} -d refseq + run ./genome_updater.sh -d refseq -b ${label1} -o ${outdir} -d refseq sanity_check ${outdir} ${label1} # Second version with more entries (refseq,genbank) label2="v2" - run ./genome_updater.sh -b ${label2} -o ${outdir} -d refseq,genbank + run ./genome_updater.sh -d refseq -b ${label2} -o ${outdir} -d refseq,genbank sanity_check ${outdir} ${label2} # Third version with same entries (nothing to download) label3="v3" - run ./genome_updater.sh -b ${label3} -o ${outdir} -d refseq,genbank + run ./genome_updater.sh -d refseq -b ${label3} -o ${outdir} -d refseq,genbank sanity_check ${outdir} ${label3} # Check log for no updates @@ -321,7 +326,7 @@ setup_file() { # Fourth version with the same as second but rolling back from first, re-download files label4="v4" - run ./genome_updater.sh -b ${label4} -o ${outdir} -d refseq,genbank -B v1 + run ./genome_updater.sh -d refseq -b ${label4} -o ${outdir} -d refseq,genbank -B v1 sanity_check ${outdir} ${label4} # Check log for updates @@ -332,13 +337,13 @@ setup_file() { @test "Delete extra files" { outdir=${outprefix}delete-extra-files/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} sanity_check ${outdir} ${label} # Create extra files touch "${outdir}${label}/files/EXTRA_FILE.txt" assert_file_exist "${outdir}${label}/files/EXTRA_FILE.txt" # Run to fix and delete - run ./genome_updater.sh -b ${label} -o ${outdir} -i -x + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -i -x sanity_check ${outdir} ${label} # File was removed assert_not_exist "${outdir}${label}/files/EXTRA_FILE.txt" @@ -350,7 +355,7 @@ setup_file() { # update label label="update" # Update (should not not carry extra file over to new version) - run ./genome_updater.sh -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} sanity_check ${outdir} ${label} assert_not_exist "${outdir}${label}/files/ANOTHER_EXTRA_FILE.txt" @@ -360,14 +365,14 @@ setup_file() { @test "Threads" { outdir=${outprefix}threads/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -t 8 + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -t 8 sanity_check ${outdir} ${label} } @test "Silent" { outdir=${outprefix}silent/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -s + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -s sanity_check ${outdir} ${label} # check if printed to STDOUT @@ -379,7 +384,7 @@ setup_file() { label="test" use_curl=1 export use_curl - run ./genome_updater.sh -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} sanity_check ${outdir} ${label} } @@ -388,24 +393,24 @@ setup_file() { label="test" # Dry-run NEW - run ./genome_updater.sh -b ${label} -o ${outdir} -k + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -k assert_success assert_dir_not_exist ${outdir} # Real run NEW - run ./genome_updater.sh -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} sanity_check ${outdir} ${label} # Remove files to simulate failure rm ${outdir}${label}/files/* # Dry-run FIX - run ./genome_updater.sh -b ${label} -o ${outdir} -k -i + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -k -i assert_success assert_file_empty {outdir}${label}/files/ # Real run FIX - run ./genome_updater.sh -b ${label} -o ${outdir} -i + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -i sanity_check ${outdir} ${label} } @@ -414,20 +419,20 @@ setup_file() { label="test" # Dry-run NEW - run ./genome_updater.sh -b ${label} -o ${outdir} -k + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -k assert_success assert_dir_not_exist ${outdir} # Real run NEW - run ./genome_updater.sh -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} sanity_check ${outdir} ${label} # Dry-run UPDATE (use another organism group to simulate change) label="update" - run ./genome_updater.sh -g archaea,fungi -b ${label} -o ${outdir} -k + run ./genome_updater.sh -d refseq -g archaea,fungi -b ${label} -o ${outdir} -k assert_success # Real run FIX - run ./genome_updater.sh -g archaea,fungi -b ${label} -o ${outdir} + run ./genome_updater.sh -d refseq -g archaea,fungi -b ${label} -o ${outdir} sanity_check ${outdir} ${label} } diff --git a/tests/integration_online.bats b/tests/integration_online.bats index 143d53c..e85df3f 100644 --- a/tests/integration_online.bats +++ b/tests/integration_online.bats @@ -28,7 +28,7 @@ setup_file() { label="test" # Protozoa in refseq is the smallest available assembly_summary at the time of writing this test (01.2022) - run ./genome_updater.sh -g protozoa -d refseq -b ${label} -t ${threads} -o ${outdir} + run ./genome_updater.sh -d refseq -g protozoa -b ${label} -t ${threads} -o ${outdir} sanity_check ${outdir} ${label} # Check filenames @@ -40,14 +40,14 @@ setup_file() { @test "NA URL" { outdir=${outprefix}na-url/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_na_url.txt + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_na_url.txt sanity_check ${outdir} ${label} } @test "All invalid URLs" { outdir=${outprefix}all-invalid-url/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_all_invalid_url.txt + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_all_invalid_url.txt assert_success assert_equal $(count_files ${outdir} ${label}) 0 } @@ -55,7 +55,7 @@ setup_file() { @test "Some invalid URLs" { outdir=${outprefix}some-invalid-url/ label="test" - run ./genome_updater.sh -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt + run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt assert_success assert_equal $(count_files ${outdir} ${label}) 2 } @@ -77,7 +77,7 @@ setup_file() { label="test" # 5690 Trypanosoma genus - around 6 genomes, get only one per species (01.2022) - run ./genome_updater.sh -g protozoa -T 5690 -P 1 -b ${label} -o ${outdir} -t ${threads} + run ./genome_updater.sh -d refseq -g protozoa -T 5690 -P 1 -b ${label} -o ${outdir} -t ${threads} sanity_check ${outdir} ${label} # Get counts of species taxids on output @@ -108,7 +108,7 @@ setup_file() { label="test" # 5693 Trypanosoma cruzi - run ./genome_updater.sh -e ${files_dir}simulated/assembly_summary_gtdb.txt -b ${label} -o ${outdir} -t ${threads} -z + run ./genome_updater.sh -d refseq -e ${files_dir}simulated/assembly_summary_gtdb.txt -b ${label} -o ${outdir} -t ${threads} -z sanity_check ${outdir} ${label} # 1 out of 2 available on GTDB