forked from GrinnellCollegeLibraries/typooftheday
-
Notifications
You must be signed in to change notification settings - Fork 0
/
editrecord.py
105 lines (86 loc) · 2.89 KB
/
editrecord.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import re
import sys
import string
from pymarc import Record, Field, MARCReader, MARCWriter
#well, since the API still isn't out yet...
does_the_API_exist_yet = "no"
if does_the_API_exist_yet == "no":
sys.exit("Sorry, still waiting on that API...")
#define variables from command line entries from index.php
word = sys.argv[1]
wrong_word = sys.argv[2]
search_type = sys.argv[3]
if search_type == 'full':
wrong_word_rx = '(^|\s)' + wrong_word + '\\s'
elif search_type == 'partial':
wrong_word_rx = '(^|\s)' + wrong_word + '[\w]'
else:
sys.exit("Sorry, missing search type var")
###define functions ###
def subfield_to_string(sfv):
'''takes the subfield value (sfv) and does the following:
- convert to string
- strip out brackets'''
sfv = str(sfv)
sfv = sfv.rstrip('\']')
sfv = sfv.lstrip('[\'')
return sfv
def fix_245_misspelling(sfv,word,subfield,title):
'''fixes misspelling in 245 field.
Recreate 245 field and delete old one after fix'''
if re.search(wrong_word_rx, sfv):
title_fix = sfv.replace(wrong_word, word)
for field in title:
marc['245'][subfield] = title_fix
else:
pass
### end functions ###
records = MARCReader(open(sys.argv[4]))
for marc in records:
#get the subfield a, b of the 245 field
title_245 = marc.get_fields('245')
for field in title_245:
title_a_raw = field.get_subfields('a')
title_b_raw = field.get_subfields('b')
title_a_raw = subfield_to_string(title_a_raw)
title_b_raw = subfield_to_string(title_b_raw)
#check to see if an RDA record that has the 246 $i note and has the misspelling in 245
#if so, move on to the next record
#thanks to @zemkat for the whack to the head w/a clue bat about this one
title_246 = marc.get_fields('246')
rda = False
for field in title_246:
rda_246 = field.get_subfields('i')
rda_246 = str(rda_246)
rda_246 = rda_246.lower()
if rda_246.find('title should read') and not (re.search(wrong_word_rx, title_a_raw) and re.search(wrong_word_rx, title_b_raw)):
rda = True
else:
rda = False
if rda:
continue #skip record, go onto next record
#time to fix that pesky misspelling...
fix_245_misspelling(title_a_raw,word,'a',title_245)
fix_245_misspelling(title_b_raw,word,'b',title_245)
#get the bib record from the 907 field prior to deletion
n = marc.get_fields('907')
for field in n:
bib_rec_num_raw = field.get_subfields('a')
bib_rec_num = subfield_to_string(bib_rec_num_raw)
#add 949 local field for overlay of bib record and creation of order record when record is uploaded into Millennium
marc.add_field(
Field(
tag = '949',
indicators = [' ',' '],
subfields = [
'a', '*recs-b;ov-%s;' %(bib_rec_num)
]))
#delete 907, 998, 910, 945 fields
for f in marc.get_fields('907', '998', '910', '945'):
if f['a'] != '':
marc.remove_field(f)
#append record to a generic file.dat file
writer = MARCWriter(file(word+'.dat','a'))
writer.write(marc)
#closes .dat file
writer.close()