-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.txt
99 lines (73 loc) · 3.46 KB
/
config.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
######################################################
### Configuration File for all HybPhaser R scripts ###
######################################################
# General settings
path_to_output_folder = ""
fasta_file_with_targets = ""
targets_file_format = "DNA" # "DNA" or "AA"
path_to_namelist = ""
intronerated_contig = "no"
###############################
### Part 1: SNP Assessment ###
###############################
name_for_dataset_optimization_subset = ""
# missing data
remove_samples_with_less_than_this_propotion_of_loci_recovered = 0
remove_samples_with_less_than_this_propotion_of_target_sequence_length_recovered = 0
remove_loci_with_less_than_this_propotion_of_samples_recovered = 0
remove_loci_with_less_than_this_propotion_of_target_sequence_length_recovered = 0
# Paralogs
remove_loci_for_all_samples_with_more_than_this_mean_proportion_of_SNPs = "none" # any number between 0 and 1, "none" or "outliers"
file_with_putative_paralogs_to_remove_for_all_samples = ""
remove_outlier_loci_for_each_sample = "no"
##################################
### Part 2: Clade Association ###
##################################
# set variables to determine thresholds for the dataset optimization and run the script from the main script
path_to_clade_association_folder = ""
csv_file_with_clade_reference_names = ""
path_to_reference_sequences = ""
path_to_read_files_cladeassociation = ""
read_type_cladeassociation = ""
ID_read_pair1 = ""
ID_read_pair2 = ""
file_with_samples_included = ""
path_to_bbmap = ""
no_of_threads_clade_association = "auto"
run_clade_association_mapping_in_R = "no"
java_memory_usage_clade_association = "" # e.g. 2G (for 2GB) needed when java -Xmx error comes up. should be max. 85% of physical memory
#######################
### Part 3: Phasing ###
#######################
# set variables and direct the scripts to the right folders and files before running the single scripts (from inside this file)
path_to_phasing_folder = ""
csv_file_with_phasing_prep_info = ""
path_to_read_files_phasing = ""
read_type_4phasing = "paired-end"
ID_read_pair1 = "_R1.fastq.gz" # e.g. "_R1.fastq.gz"
ID_read_pair2 = "_R2.fastq.gz" # e.g. "_R2.fastq.gz"
reference_sequence_folder = ""
folder_for_phased_reads = ""
folder_for_phasing_stats = ""
path_to_bbmap_executables = ""
no_of_threads_phasing = "auto"
java_memory_usage_phasing = "" # e.g. 2G (for 2GB) needed when java -Xmx error comes up. should be max. 85% of physical memory
run_bash_script_in_R = "no"
################################
### Part 4: Merging Datasets ###
################################
# This script is to generate sequence lists that combine the phased sequences with the normal non-phased ones (but exclude the normal ones of the phased accessions)
# It is also possible to make subsets of samples or loci using lists of included or excluded samples/loci
path_to_sequence_lists_normal = ""
path_to_sequence_lists_phased = ""
path_of_sequence_lists_output = ""
path_to_namelist_normal = ""
path_to_namelist_phased = ""
# to generate subsets one can chose to define samples that are either included or excluded
file_with_samples_included = ""
file_with_samples_excluded = ""
# Similarly one can choose to exclude loci or use a list with only the included loci.
file_with_loci_excluded = ""
file_with_loci_included = ""
exchange_phased_with_not_phased_samples = "yes" # "yes" or "no"
include_phased_seqlists_when_non_phased_locus_absent = "no" # "yes" or "no"