Skip to content

Commit

Permalink
changed from bidscoin to dcm2niix + in-house sorting; set two_pass pr…
Browse files Browse the repository at this point in the history
…ocessing as the default; documentation updates; updated tests
  • Loading branch information
astewartau committed Dec 17, 2021
1 parent d5563b1 commit 29dfd25
Show file tree
Hide file tree
Showing 6 changed files with 347 additions and 158 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ docker run -it -v ~/neurodesktop-storage:/neurodesktop-storage vnmd/qsmxt_1.1.8:
1. Convert DICOM data to BIDS:
```bash
python3 /opt/QSMxT/run_0_dicomSort.py REPLACE_WITH_YOUR_DICOM_INPUT_DATA_DIRECTORY 00_dicom
python3 /opt/QSMxT/run_1_dicomToBids.py 00_dicom 01_bids
python3 /opt/QSMxT/run_1_dicomConvert.py 00_dicom 01_bids
```
After this step check if the data were correctly recognized and converted to BIDS. Otherwise make a copy of /opt/QSMxT/bidsmap.yaml - adjust based on provenance example in 01_bids/code/bidscoin/bidsmap.yaml (see for example what it detected under extra_files) - and run again with the parameter `--heuristic bidsmap.yaml`. If the data were acquired on a GE scanner the complex data needs to be corrected by applying an FFT shift, this can be done with `python /opt/QSMxT/run_1_fixGEphaseFFTshift.py 01_bids/sub*/ses*/anat/*_run-1_*.nii.gz` .
Carefully read the output of the `run_1_dicomConvert.py` script to ensure data were correctly recognized and converted. If the data were acquired on a GE scanner the complex data needs to be corrected by applying an FFT shift, this can be done with `python /opt/QSMxT/run_1_fixGEphaseFFTshift.py 01_bids/sub*/ses*/anat/*.nii*` .

2. Run QSM pipeline:
```bash
Expand Down
22 changes: 10 additions & 12 deletions run_0_dicomSort.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
# https://gist.github.com/alex-weston-13/4dae048b423f1b4cb9828734a4ec8b83
import argparse
import os
import sys
import pydicom # pydicom is using the gdcm package for decompression
import shutil
import numpy
import subprocess
import glob
import json
import fnmatch

def empty_dirs(root_dir='.', recursive=True):
empty_dirs = []
Expand Down Expand Up @@ -54,14 +57,14 @@ def dicomsort(input_dir, output_dir, use_patient_names, use_session_dates, delet
elif file[:2] == 'IM':
extension = '.dcm'
unsortedList.append(os.path.join(root, file))

print(f'{len(unsortedList)} files found.')
print(f'{len(unsortedList)} dicom files found.')

fail = False

subjName_dates = []
subjName_sessionNums = {}

print(f'sorting dicoms in {output_dir}...')
for dicom_loc in unsortedList:
# read the file
ds = pydicom.read_file(dicom_loc, force=True)
Expand Down Expand Up @@ -123,11 +126,8 @@ def dicomsort(input_dir, output_dir, use_patient_names, use_session_dates, delet
for dicom_loc in unsortedList:
os.remove(dicom_loc)

#for folder in find_empty_dirs(input_dir):
#print(folder)
#shutil.rmtree(folder)
print('done sorting dicoms.')

print('done.')

if __name__ == "__main__":
parser = argparse.ArgumentParser(
Expand All @@ -142,8 +142,7 @@ def dicomsort(input_dir, output_dir, use_patient_names, use_session_dates, delet

parser.add_argument(
'output_dir',
default=None,
help='output directory for sorted DICOMs; by default this is the same as input_dir'
help='output directory for sorted DICOMs'
)

parser.add_argument(
Expand All @@ -169,10 +168,9 @@ def dicomsort(input_dir, output_dir, use_patient_names, use_session_dates, delet

dicomsort(
input_dir=args.input_dir,
output_dir=args.output_dir if args.output_dir is not None else args.input_dir,
output_dir=args.output_dir,
use_patient_names=args.use_patient_names,
use_session_dates=args.use_session_dates,
delete_originals=args.input_dir == args.output_dir or args.delete_originals
)


273 changes: 273 additions & 0 deletions run_1_dicomConvert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
#!/usr/bin/env python3
import argparse
import os
import sys
import subprocess
import glob
import json
import fnmatch

def load_json(path):
f = open(path)
j = json.load(f)
f.close()
return j

def rename(old, new, always_show=False):
if always_show or not sys.__stdin__.isatty():
print(f'Renaming {old} -> {new}')
if not os.path.exists(os.path.split(new)[0]):
os.makedirs(os.path.split(new)[0], exist_ok=True)
os.rename(old, new)

def clean(data):
return data.replace('_', '')

def convert_to_nifti(input_dir, output_dir, t2starw_series_patterns, t1w_series_patterns, auto_yes):
print('Converting all DICOMs to nifti...')
subjects = os.listdir(input_dir)
for subject in subjects:
sessions = os.listdir(os.path.join(input_dir, subject))
for session in sessions:
session_extra_folder = os.path.join(output_dir, clean(subject), session, "extra_data")
os.makedirs(session_extra_folder, exist_ok=True)
if 'dcm2niix_output.txt' in os.listdir(session_extra_folder):
print(f'Warning: {session_extra_folder} already has dcm2niix conversion output! Skipping...')
continue
series = os.listdir(os.path.join(input_dir, subject, session))
for s in series:
series_dicom_folder = os.path.join(input_dir, subject, session, s)
print(f"dcm2niix -z n -o {session_extra_folder} {series_dicom_folder}")
subprocess.call(f"dcm2niix -z n -o {session_extra_folder} {series_dicom_folder} >> {os.path.join(session_extra_folder, 'dcm2niix_output.txt')}", executable='/bin/bash', shell=True)

print(f"Enumerating series names from JSON headers in '{output_dir}/.../extra_data' folders...")
all_series_names = []
subjects = os.listdir(output_dir)
json_files = []
json_datas = []
for subject in subjects:
sessions = os.listdir(os.path.join(output_dir, subject))
for session in sessions:
session_extra_folder = os.path.join(output_dir, subject, session, "extra_data")
json_files.extend(sorted(glob.glob(os.path.join(session_extra_folder, "*json"))))
json_datas.extend([load_json(json_file) for json_file in sorted(glob.glob(os.path.join(session_extra_folder, "*json")))])
all_series_names = sorted(list(set([
json_datas[i]['SeriesDescription']
for i in range(len(json_datas))
if json_datas[i]["Modality"] == "MR"
])))
if not all_series_names:
print(f"No valid series found in JSON headers in '{output_dir}/.../extra_data' folders!")
exit(1)
print(f"All series names identified: {all_series_names}")

# identify series using patterns
t2starw_series_names = []
for t2starw_series_pattern in t2starw_series_patterns:
for series_name in all_series_names:
if fnmatch.fnmatch(series_name, t2starw_series_pattern):
t2starw_series_names.append(series_name)
t1w_series_names = []
for t1w_series_pattern in t1w_series_patterns:
for series_name in all_series_names:
if fnmatch.fnmatch(series_name, t1w_series_pattern):
t1w_series_names.append(series_name)
if t2starw_series_names:
print(f"Chosen t2starw patterns {t2starw_series_patterns} matched with the following series: {t2starw_series_names}")
if t1w_series_names:
print(f"Chosen t1w patterns {t1w_series_patterns} matched with the following series: {t1w_series_names}")

if not t2starw_series_names and (sys.__stdin__.isatty() and not auto_yes): # if running interactively
print(f"No t2starw series found matching patterns: {t2starw_series_patterns}")
for i in range(len(all_series_names)):
print(f"{i+1}. {all_series_names[i]}")
while True:
user_input = input("Identify T2Starw scans for QSM (comma-separated numbers): ")
t2starw_scans_idx = user_input.split(",")
try:
t2starw_scans_idx = [int(j)-1 for j in t2starw_scans_idx]
except:
print("Invalid input")
continue
t2starw_scans_idx = sorted(list(set(t2starw_scans_idx)))
try:
t2starw_series_names = [all_series_names[j] for j in t2starw_scans_idx]
break
except:
print("Invalid input")
if t2starw_series_names:
print(f"Identified matching t2starw series: {t2starw_series_names}")
elif not t2starw_series_names:
print("Error: No t2starw series found!")
exit(1)

# identify T1w series
if not t1w_series_names and (sys.__stdin__.isatty() and not auto_yes):
print(f"No t1w series found matching pattern: {t1w_series_pattern}")
for i in range(len(all_series_names)):
print(f"{i+1}. {all_series_names[i]}")
while True:
user_input = input("Identify t1w scans for automated segmentation (comma-separated numbers; enter nothing to ignore): ").strip()
if user_input == "":
break
t1w_scans_idx = user_input.split(",")
try:
t1w_scans_idx = sorted(list(set([int(j)-1 for j in t1w_scans_idx])))
except:
print("Invalid input")
continue
try:
t1w_series_names = [all_series_names[j] for j in t1w_scans_idx]
break
except:
print("Invalid input")
if t1w_series_names:
print(f"Identified matching t1w series: {t1w_series_names}")
if not t1w_series_names:
print(f"Warning: No t1w series found matching patterns {t1w_series_patterns}! Automated segmentation will not be possible.")

print('Parsing JSON headers...')
all_session_details = []
for subject in subjects:
sessions = os.listdir(os.path.join(output_dir, subject))
for session in sessions:
session_extra_folder = os.path.join(output_dir, subject, session, "extra_data")
session_anat_folder = os.path.join(output_dir, subject, session, "anat")
json_files = sorted(glob.glob(os.path.join(session_extra_folder, "*json")))
session_details = []
for json_file in json_files:
json_data = load_json(json_file)
if json_data['Modality'] == 'MR' and json_data['SeriesDescription'] in t2starw_series_names + t1w_series_names:
details = {}
details['subject'] = subject
details['session'] = session
details['series_type'] = None
if json_data['SeriesDescription'] in t2starw_series_names:
details['series_type'] = 't2starw'
elif json_data['SeriesDescription'] in t1w_series_names:
details['series_type'] = 't1w'
details['series_num'] = json_data['SeriesNumber']
details['part_type'] = 'phase' if 'P' in json_data['ImageType'] else 'magnitude'
details['echo_time'] = json_data['EchoTime']
details['file_name'] = json_file.split('.')[0]
details['run_num'] = None
details['echo_num'] = None
details['num_echoes'] = None
details['new_name'] = None
session_details.append(details)
session_details = sorted(session_details, key=lambda f: (f['subject'], f['session'], f['series_type'], f['series_num'], 0 if 'phase' in f['part_type'] else 1, f['echo_time']))

# update run numbers
run_num = 1
series_num = session_details[0]['series_num']
series_type = session_details[0]['series_type']
for i in range(len(session_details)):
if session_details[i]['series_num'] != series_num:
if session_details[i]['series_type'] == 't2starw' and session_details[i-1]['part_type'] == 'phase':
run_num += 1
elif session_details[i]['series_type'] == 't1w' and session_details[i-1]['series_type'] == 't1w':
run_num += 1
elif session_details[i]['series_type'] != session_details[i-1]['series_type']:
run_num = 1

series_num = session_details[i]['series_num']
series_type = session_details[0]['series_type']
session_details[i]['run_num'] = run_num

# update echo numbers and number of echoes
t2starw_details = [details for details in session_details if details['series_type'] == 't2starw']
t2starw_run_nums = sorted(list(set(details['run_num'] for details in t2starw_details)))
for run_num in t2starw_run_nums:
echo_times = sorted(list(set([details['echo_time'] for details in t2starw_details if details['run_num'] == run_num])))
num_echoes = len(echo_times)
for details in t2starw_details:
if details['run_num'] == run_num:
details['num_echoes'] = num_echoes
details['echo_num'] = echo_times.index(details['echo_time']) + 1

# update names
for details in session_details:
if details['series_type'] == 't1w':
details['new_name'] = os.path.join(session_anat_folder, f"{clean(subject)}_{clean(session)}_run-{details['run_num']}_T1w")
elif details['num_echoes'] == 1:
details['new_name'] = os.path.join(session_anat_folder, f"{clean(subject)}_{clean(session)}_run-{details['run_num']}_part-{details['part_type']}_T2starw")
else:
details['new_name'] = os.path.join(session_anat_folder, f"{clean(subject)}_{clean(session)}_run-{details['run_num']}_echo-{details['echo_num']}_part-{details['part_type']}_MEGRE")

# store session details
all_session_details.extend(session_details)

# if running interactively, show a summary of the renames prior to actioning
if sys.__stdin__.isatty() and not auto_yes:
print("Summary of identified files and proposed renames (following BIDS standard):")
for f in all_session_details:
print(f"{os.path.split(f['file_name'])[1]} \n\t -> {os.path.split(f['new_name'])[1]}")
if input("Confirm renaming? (n for no): ").strip().lower() in ["n", "no"]:
exit()

# rename all files
print("Renaming files...")
for details in all_session_details:
rename(details['file_name']+'.json', details['new_name']+'.json', always_show=auto_yes)
rename(details['file_name']+'.nii', details['new_name']+'.nii', always_show=auto_yes)
print("Finished!")

if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="QSMxT dicomConvert: Converts DICOM files to NIfTI/BIDS",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument(
'input_dir',
help='Sorted DICOM directory generated using run_0_dicomSort.py of the format {subject}/{session}/{series}'
)

parser.add_argument(
'output_dir',
help='Output directory for converted NIfTIs'
)

parser.add_argument(
'--use_patient_names',
action='store_true',
help='Use the PatientName rather than PatientID for subject folders'
)

parser.add_argument(
'--use_session_dates',
action='store_true',
help='Use the StudyDate field rather than an incrementer for session IDs'
)

parser.add_argument(
'--auto_yes',
action='store_true',
help='Force running non-interactively'
)

parser.add_argument(
'--t2starw_series_patterns',
default=['*t2starw*'],
nargs='*',
help='Patterns used to identify t2starw series for QSM from the DICOM SeriesDescription field'
)

parser.add_argument(
'--t1w_series_patterns',
default=['*t1w*'],
nargs='*',
help='Patterns used to identify t1w series for segmentation from the DICOM SeriesDescription field'
)


args = parser.parse_args()

convert_to_nifti(
input_dir=args.input_dir,
output_dir=args.output_dir,
t2starw_series_patterns=args.t2starw_series_patterns,
t1w_series_patterns=args.t1w_series_patterns,
auto_yes=args.auto_yes
)

Loading

0 comments on commit 29dfd25

Please sign in to comment.