From ba28a657eaf61ca6d46efc1f660f60fcef67d8ad Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 1 Aug 2019 15:07:18 -0400 Subject: [PATCH 1/4] finish_agrument_non_unique_CDS_IDS --- gff3tool/lib/gff3_ID_generator.py | 46 +++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/gff3tool/lib/gff3_ID_generator.py b/gff3tool/lib/gff3_ID_generator.py index a4f824c..e4dea85 100644 --- a/gff3tool/lib/gff3_ID_generator.py +++ b/gff3tool/lib/gff3_ID_generator.py @@ -180,6 +180,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit 'missing': [] } ID_order = [] + Copy_ID_dict=[] roots = list() logger.info('Generate new ID for features in (%s)', in_gff) for line in gff3.lines: @@ -188,10 +189,14 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit if uuid_on: newID = str(uuid.uuid1()) if 'ID' in line['attributes']: + #print line['attributes'] if line['attributes']['ID'] in ID_dict: + #print line['attributes']['ID'] ID_dict[line['attributes']['ID']].append(newID) if alias: line['attributes']['Alias'] = line['attributes']['ID'] + if line['attributes']['ID'] not in Copy_ID_dict: + Copy_ID_dict.append(line['attributes']['ID']) line['attributes']['ID'] = newID else: ID_dict[line['attributes']['ID']] = [newID] @@ -202,6 +207,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit else: ID_dict['missing'].append(newID) line['attributes']['ID'] = newID + if 'Parent' in line['attributes']: for index, parent in enumerate(line['attributes']['Parent']): if parent in ID_dict: @@ -217,13 +223,15 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit except KeyError: logger.warning('[Missing Attributes] Line (%s)', str(line['line_index'] + 1)) IDnumber = 0 + #print Copy_ID_dict + #gene parent for root in roots: newID = idgenerator(prefix, IDnumber, digitlen) IDnumber = newID['maxnum'] ID_dict[root['attributes']['ID']] = [newID['ID']] ID_order.append(root['attributes']['ID']) if alias: - root['attributes']['Alias'] = root['attributes']['ID'] + root['attributes']['Alias'] = root['attributes']['ID'] root['attributes']['ID'] = newID['ID'] children = root['children'] alphabets = list(string.ascii_uppercase) @@ -231,7 +239,6 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit for index, parent in enumerate(child['attributes']['Parent']): if parent in ID_dict: child['attributes']['Parent'][index] = newID['ID'] - newcID = '%s-R%s' % (newID['ID'], alphabets.pop(0)) ID_dict[child['attributes']['ID']] = [newcID] ID_order.append(child['attributes']['ID']) @@ -271,6 +278,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit descend['attributes']['ID'] = newdID['ID'] flag = True descend['attributes']['Parent'][index] = ID_dict[parent][0] + if merge_report and out_merge_report: logger.info('Update report file generated by gff3_merge program with new IDs.') with open(out_merge_report, 'w') as out_f: @@ -287,17 +295,48 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit for log_line in log_lines: out_f.write('\t'.join(log_line) + '\n') logger.info('Write out gff3 file: (%s)', out_gff) + + #cds_share_id + cds_parent=[] + Copy_ID_dict_value=[] + listid=[] + if args.type: + for line in gff3.lines: + if line['type']=='CDS': + for i in line['attributes']['Parent']: + if i not in cds_parent: + cds_parent.append(i) + + #parent_feature + for z in cds_parent: + newID2 = str(uuid.uuid4()) + if z not in Copy_ID_dict_value: + Copy_ID_dict_value.append(z) + keyparent={'Parent':z,'ID':[newID2]} + #listid will store all newid in order + listid.append(keyparent['ID']) + #overwrite gff3 file + line['attributes'].update(keyparent) + + cds_update_part = {} + for k,v in zip(Copy_ID_dict, listid): + cds_update_part.setdefault(k,v) + #cds_update_part.setdefault(k, []).append(v) + + #overwrite report file + ID_dict.update(cds_update_part) + write_gff3(gff3, out_gff) if report: ID_order.append('missing') logger.info('Generate a report of comparison between old and new IDs: (%s)', report) out_line = 'Old_ID\tNewID' out_report.write(out_line+'\n') + for key in ID_order: for value in ID_dict[key]: out_line = '%s\t%s' % (key, value) out_report.write(out_line+'\n') - out_report.close() if __name__ == '__main__': @@ -318,6 +357,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit parser.add_argument('-r', '--report', type=str, help='Generate a table of comparison between old and new IDs.') parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('-a', '--alias', action='store_true', default=False, help='Specify this argument if you want old IDs to be retained in the gff3 file as an Alias attribute') + parser.add_argument('-t','--type', type=str) args = parser.parse_args() main(in_gff=args.gff, merge_report=args.merge_report, out_merge_report=args.out_merge_report, out_gff=args.output_gff, uuid_on=args.universally_unique_identifier, prefix=args.idprefix, digitlen=args.digitlen, report=args.report, alias=args.alias) From 51ecc7d0344d3dd689392a6d91f6d3de466448d8 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 1 Aug 2019 15:22:46 -0400 Subject: [PATCH 2/4] remove_whitespace_for_codacy --- gff3tool/lib/gff3_ID_generator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gff3tool/lib/gff3_ID_generator.py b/gff3tool/lib/gff3_ID_generator.py index e4dea85..29ad511 100644 --- a/gff3tool/lib/gff3_ID_generator.py +++ b/gff3tool/lib/gff3_ID_generator.py @@ -196,7 +196,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit if alias: line['attributes']['Alias'] = line['attributes']['ID'] if line['attributes']['ID'] not in Copy_ID_dict: - Copy_ID_dict.append(line['attributes']['ID']) + Copy_ID_dict.append(line['attributes']['ID']) line['attributes']['ID'] = newID else: ID_dict[line['attributes']['ID']] = [newID] @@ -231,7 +231,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit ID_dict[root['attributes']['ID']] = [newID['ID']] ID_order.append(root['attributes']['ID']) if alias: - root['attributes']['Alias'] = root['attributes']['ID'] + root['attributes']['Alias'] = root['attributes']['ID'] root['attributes']['ID'] = newID['ID'] children = root['children'] alphabets = list(string.ascii_uppercase) @@ -306,9 +306,9 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit for i in line['attributes']['Parent']: if i not in cds_parent: cds_parent.append(i) - + #parent_feature - for z in cds_parent: + for z in cds_parent: newID2 = str(uuid.uuid4()) if z not in Copy_ID_dict_value: Copy_ID_dict_value.append(z) From 0a79ecd6e6b8296f02f7c0cbc24635978e1b9043 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 1 Aug 2019 15:27:45 -0400 Subject: [PATCH 3/4] remove_whitespace_for_codacy_2 --- gff3tool/lib/gff3_ID_generator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gff3tool/lib/gff3_ID_generator.py b/gff3tool/lib/gff3_ID_generator.py index 29ad511..fdd97df 100644 --- a/gff3tool/lib/gff3_ID_generator.py +++ b/gff3tool/lib/gff3_ID_generator.py @@ -196,7 +196,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit if alias: line['attributes']['Alias'] = line['attributes']['ID'] if line['attributes']['ID'] not in Copy_ID_dict: - Copy_ID_dict.append(line['attributes']['ID']) + Copy_ID_dict.append(line['attributes']['ID']) line['attributes']['ID'] = newID else: ID_dict[line['attributes']['ID']] = [newID] @@ -231,7 +231,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit ID_dict[root['attributes']['ID']] = [newID['ID']] ID_order.append(root['attributes']['ID']) if alias: - root['attributes']['Alias'] = root['attributes']['ID'] + root['attributes']['Alias'] = root['attributes']['ID'] root['attributes']['ID'] = newID['ID'] children = root['children'] alphabets = list(string.ascii_uppercase) @@ -306,7 +306,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit for i in line['attributes']['Parent']: if i not in cds_parent: cds_parent.append(i) - + #parent_feature for z in cds_parent: newID2 = str(uuid.uuid4()) From 949b4cc3e214826963cee2d6e5bfb074dcf31ecc Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 1 Aug 2019 15:32:07 -0400 Subject: [PATCH 4/4] remove_whitespace_for_codacy_3 --- gff3tool/lib/gff3_ID_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gff3tool/lib/gff3_ID_generator.py b/gff3tool/lib/gff3_ID_generator.py index fdd97df..b4d5328 100644 --- a/gff3tool/lib/gff3_ID_generator.py +++ b/gff3tool/lib/gff3_ID_generator.py @@ -196,7 +196,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit if alias: line['attributes']['Alias'] = line['attributes']['ID'] if line['attributes']['ID'] not in Copy_ID_dict: - Copy_ID_dict.append(line['attributes']['ID']) + Copy_ID_dict.append(line['attributes']['ID']) line['attributes']['ID'] = newID else: ID_dict[line['attributes']['ID']] = [newID] @@ -207,7 +207,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit else: ID_dict['missing'].append(newID) line['attributes']['ID'] = newID - + if 'Parent' in line['attributes']: for index, parent in enumerate(line['attributes']['Parent']): if parent in ID_dict: