-
Notifications
You must be signed in to change notification settings - Fork 2
/
extract.py
219 lines (200 loc) · 7.1 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import string, io, sys, binascii, os
from time import gmtime, strftime
"""
USAGE
You need to have previously created a drive image of your DVR and stored it on a different
harddrive somewhere. Enter the path to the image below. Will take 10+ hours to run on 160GB drive
"""
"""
USER-DEFINED VARIABLES
src_loc - Path and filename to an image file which is the raw contents of the drive.
Use a tool such as UFS Explorer Standard Recovery to backup the drive to an image
target_dir - Directory in which to output each sequential movie-block
"""
src_loc="E:\\work\\drive\\pioneer.img"
target_dir="E:\\work\\out\\"
"""
DETAILS HOW IT WORKS
This is a script designed to extract raw video/audio data off
of a Pioneer DVR-633H device. The 633H frequently corrupts the
boot portion of the drive, rendering the device inoperable.
Fortunately, the data is often intact and recoverable. The 633H
appears to store all video/audio data in 2KB streaming mpeg chunks.
They are stored (convienently) sequentially on the disk. Each block
of mpeg media is prefixed with the following magic hex string
(mpeg identifer): 000001ba
Additionally, this sequence occurs reguarly every 2KB of data.
In other words, the 633H stores all it's content in 2KB mpeg
streaming chunks which can simply be concatenated to form the
full original media stream.
For whatever reason, the 633H sometimes stores "useless" chunks
with the mpeg identifier which are larger than 2KB. Inside these
larger chunks, many refernces to the magix hex string can be found
but the sizes are not 2KB and we defined them as corrupted/useless.
This data should not be used. If it is included in the final
mpeg compilatoin, playback may end when that piece of video
is reached.
We take the following strategy:
1. Starting at sector 0
2. Loop io stream over all sectors until first occurence
of magic hex string is found
3. Continue iterating forward until next magic hex string
is found. Substract the location of second from first
to get a raw chunk size
4. If chunk size != 2KB, discard it and go to (2)
5. If chunk == 2KB, write it output stream
6. Loop (2) through (5) until end of io stream
Additionally, we look for the new_file_hex string in each chunk. If it
exists, write to a new outpul file. This may not be perfect (some video
tracks skip around) but gives you a manageable amount of output videos
to concatenate together using simple video editing software (VirtualDub)
"""
magic_hex="000001ba"
new_file_hex="50494f4e454552"
new_file_counter=0
starting_stream_loc=0
ending_stream_loc=0
chunk_size=2048
def main(src_loc, target_dir, magic_hex, new_file_hex, new_file_counter, starting_stream_loc, ending_stream_loc, chunk_size):
"""
Execution enters here, controls master loop over data
"""
# init screen
print ""
print "Pioneer Extractor (633H)"
print "Mike Knoop, 2011, knoopgroup.com"
print ""
print "the current time is",
print strftime("%Y-%m-%d %H:%M:%S", gmtime())
print ""
# open files
print "opening source and initial target...",
src = io.open(src_loc, 'rb')
target = io.open(build_target(target_dir, `new_file_counter`), 'wb')
print "done!\n"
# master loop
end_of_stream = False
is_new_file = False
new_extra_check_last = False
bad_chunk_count = 0
bad_chunk_size = 0
i = 0
info = os.stat(src_loc)
src_size = info.st_size
target_size = 0
print "total source size:",
print src_size,
print "bytes"
print "processing data..."
try:
# find first instance
loc = find_string(src, magic_hex, starting_stream_loc)
last_loc = loc
while (not end_of_stream):
# find next instance
loc_search_start = last_loc + (len(magic_hex) / 2)
new_loc = find_string(src, magic_hex, loc_search_start)
if (not new_loc):
end_of_stream = True
break
# calculate byte size of chunk
size = new_loc - last_loc
if (size == chunk_size):
# check if new file first
is_new_file = False
is_new_file_2 = False
is_new_file = find_string(src, new_file_hex, last_loc, chunk_size, chunk_size-100, new_loc)
if is_new_file:
# perform a secondary check
src.seek(is_new_file-29)
p = src.read(2)
new_extra_check = binascii.hexlify(p)
if (new_extra_check != "0000") and (new_extra_check_last != new_extra_check) and new_extra_check_last:
new_file_counter = new_file_counter + 1
target.close()
target = io.open(build_target(target_dir, `new_file_counter`), 'wb')
new_extra_check_last = new_extra_check
src.seek(last_loc)
tmp = src.read(chunk_size)
target.write(tmp)
target_size = target_size + chunk_size
i = i + 1
if (i % 100000 == 0):
print target_size,
print "bytes"
else:
bad_chunk_count = bad_chunk_count + 1
bad_chunk_size = bad_chunk_size + size
#sys.stdout.write("*")
# else the chunk is bad mpeg data. discard it
# prepare for next iert
last_loc = new_loc
if (ending_stream_loc != 0):
if (new_loc > ending_stream_loc):
end_of_stream = True
print "ending because ending_stream_loc was reached."
break
except Exception as e:
print "exception:",
print e
print "success!\n"
print "extracted",
print i,
print "files."
print "found",
print bad_chunk_count,
print "bad chunks with total size",
print bad_chunk_size,
print "bytes\n"
# clean up
print "cleaning up...",
target.close()
src.close()
# exit
print "clean exit.\n"
print "the current time is",
print strftime("%Y-%m-%d %H:%M:%S", gmtime())
print ""
sys.exit()
def build_target(dir, filename):
lst = []
lst.append(dir)
lst.append('0')
lst.append(filename)
lst.append('.mpg')
loc = string.join(lst, '')
return loc
def find_string(src, val, start, buffer=2500, inc_buffer=2000, max_i=0):
"""
Given io source (src), str (val), starting byte offset (start),
incremental search size (buffer),
Return a byte offset where first occurence of val is found
else false
"""
found = False
end_of_stream = False
i = start
count = 0
while (not found and not end_of_stream):
src.seek(i)
tmp = src.read(buffer)
if (len(tmp) == 0):
end_of_stream = True
break
tmp = binascii.hexlify(tmp)
if (val in tmp):
# div by two since hexlify makes string twice as long
idx = tmp.find(val) / 2
found = True
break
if max_i != 0:
if i > max_i:
end_of_stream = True
break
i = i + inc_buffer
count = count + 1
if (end_of_stream):
return False
if (found):
return i + idx
main(src_loc, target_dir, magic_hex, new_file_hex, new_file_counter, starting_stream_loc, ending_stream_loc, chunk_size)