-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_csv_cellranger.py
50 lines (38 loc) · 1.99 KB
/
create_csv_cellranger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import argparse
def extract_info_from_filename(filename):
parts = filename.split('_')
if len(parts) > 1:
return parts[0], f"{input_path}/{filename.split('_')[0]}"
return None, None
def generate_csv_content(filename, path):
# Extract the fastq_id from the filename
fastq_id = filename.split('_')[0]
# Modify the 'path' variable in the [libraries] section
modified_path = '/'.join(path.split('/')[:-1]) + '/'
content = f"[gene-expression]\nreference,/hpcwork/rwth1209/reference_genome/human_GRCh38_10x\nprobe-set,/work/rwth1209/software/cellranger-7.1.0/probe_sets/Chromium_Human_Transcriptome_Probe_Set_v1.0.1_GRCh38-2020-A.csv\nno-bam,true\n\n[libraries]\nfastq_id,fastqs,feature_types\n{fastq_id},{modified_path},Gene Expression"
return content
def main(input_path):
csv_folder = os.path.join(input_path, "csv")
os.makedirs(csv_folder, exist_ok=True)
processed_prefixes = set()
for root, dirs, files in os.walk(input_path):
for file in files:
if file.endswith(".fastq.gz"):
filename = file.split('_')[0]
if filename in processed_prefixes:
continue
processed_prefixes.add(filename)
file_path = os.path.join(root, file)
output_csv = os.path.join(csv_folder, f"{filename}.csv")
fastq_id, fastq_path = extract_info_from_filename(file)
if fastq_id and fastq_path:
csv_content = generate_csv_content(fastq_id, fastq_path)
with open(output_csv, 'w') as f:
f.write(csv_content)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate CSV files from .fastq.gz files.")
parser.add_argument("-i", "--input", required=True, help="Path to the .fastq.gz files")
args = parser.parse_args()
input_path = args.input
main(input_path)