-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pure dec enc with gz #143
Merged
Merged
Pure dec enc with gz #143
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit
Hold shift + click to select a range
6c54ca0
revise a decoder and encoder, being pure
hannesm c67f945
remove stuff
hannesm 9ccc73b
wip
hannesm ebabd3c
fix
hannesm ce9337b
proposed API
hannesm 50f6659
add filter
hannesm 1b4ae55
initial compiling tar_unix
hannesm 984ffe0
remove offset nonsense
hannesm 9c1c120
lwt-unix
hannesm 29d884e
further work, get tests a bit more up to speed
hannesm 281883b
more tests are working now
hannesm 60d6faa
revive transform test
hannesm 462063b
test tar_unix, use fold for list
hannesm 2b49b1f
document write_header
hannesm 2388f62
Purify fold and move it into Tar with a GADT, use it then for Tar_gz …
dinosaure 8b308a9
Keep the bind as is and Tar_gz does not require the run function (/cc…
dinosaure 14681fe
Implement Tar_gz.gzipped : _ Tar.t -> _ Tar.t
reynir d5ad1df
Fix the otar binary
dinosaure c7c81d2
Implement the high kind polymorphism to fix the lwt_unix layer
dinosaure 906d6dc
Add a comment to explain the hkp trick
dinosaure b8b4ff6
Minor: qualify opens, fix tests
reynir 0cfd771
Partially implement tar_eio, stub out remainder
reynir c24cd1b
Seek returns unit, improve documentation
robur-team 576dcff
Remove [`Msg of string] from Tar_unix.decode_error
robur-team b1c10d0
Document Tar.fold
reynir 890c1fe
Fixups
reynir File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -14,15 +14,10 @@ | |||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||||
*) | ||||||
|
||||||
module type READER = sig | ||||||
type in_channel | ||||||
type 'a io | ||||||
val read : in_channel -> bytes -> int io | ||||||
end | ||||||
|
||||||
external ba_get_int32_ne : De.bigstring -> int -> int32 = "%caml_bigstring_get32" | ||||||
external ba_set_int32_ne : De.bigstring -> int -> int32 -> unit = "%caml_bigstring_set32" | ||||||
|
||||||
(* | ||||||
let bigstring_to_string ?(off= 0) ?len ba = | ||||||
let len = match len with | ||||||
| Some len -> len | ||||||
|
@@ -41,6 +36,7 @@ let bigstring_to_string ?(off= 0) ?len ba = | |||||
Bytes.set res i v | ||||||
done; | ||||||
Bytes.unsafe_to_string res | ||||||
*) | ||||||
|
||||||
let bigstring_blit_string src ~src_off dst ~dst_off ~len = | ||||||
let len0 = len land 3 in | ||||||
|
@@ -71,6 +67,89 @@ let bigstring_blit_bytes src ~src_off dst ~dst_off ~len = | |||||
Bytes.set dst (dst_off + i) v | ||||||
done | ||||||
|
||||||
type decoder = | ||||||
{ mutable gz : Gz.Inf.decoder | ||||||
; ic_buffer : De.bigstring | ||||||
; oc_buffer : De.bigstring | ||||||
; tp_length : int | ||||||
; mutable pos : int } | ||||||
|
||||||
let really_read_through_gz | ||||||
: decoder -> bytes -> (unit, 'err) Tar.t | ||||||
= fun ({ ic_buffer; oc_buffer; tp_length; _ } as state) res -> | ||||||
let open Tar in | ||||||
let rec until_full_or_end gz (res, res_off, res_len) = | ||||||
match Gz.Inf.decode gz with | ||||||
| `Flush gz -> | ||||||
let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in | ||||||
let len = min res_len max in | ||||||
bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; | ||||||
if len < max | ||||||
then ( state.pos <- len | ||||||
; state.gz <- gz | ||||||
; return (Ok ()) ) | ||||||
else until_full_or_end (Gz.Inf.flush gz) (res, res_off + len, res_len - len) | ||||||
| `End gz -> | ||||||
let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in | ||||||
let len = min res_len max in | ||||||
bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; | ||||||
if res_len > len | ||||||
then return (Error `Eof) | ||||||
else ( state.pos <- len | ||||||
; state.gz <- gz | ||||||
; return (Ok ()) ) | ||||||
| `Await gz -> | ||||||
let* tp_buffer = Tar.read tp_length in | ||||||
let len = String.length tp_buffer in | ||||||
bigstring_blit_string tp_buffer ~src_off:0 ic_buffer ~dst_off:0 ~len; | ||||||
let gz = Gz.Inf.src gz ic_buffer 0 len in | ||||||
until_full_or_end gz (res, res_off, res_len) | ||||||
| `Malformed err -> return (Error (`Gz err)) in | ||||||
let max = (De.bigstring_length oc_buffer - Gz.Inf.dst_rem state.gz) - state.pos in | ||||||
let len = min (Bytes.length res) max in | ||||||
bigstring_blit_bytes oc_buffer ~src_off:state.pos res ~dst_off:0 ~len; | ||||||
if len < max | ||||||
then ( state.pos <- state.pos + len | ||||||
; return (Ok ()) ) | ||||||
else until_full_or_end (Gz.Inf.flush state.gz) (res, len, Bytes.length res - len) | ||||||
|
||||||
let really_read_through_gz decoder len = | ||||||
let open Tar in | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we locally open
Suggested change
|
||||||
let res = Bytes.create len in | ||||||
let* () = really_read_through_gz decoder res in | ||||||
Tar.return (Ok (Bytes.unsafe_to_string res)) | ||||||
|
||||||
type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] | ||||||
|
||||||
let seek_through_gz : decoder -> int -> (int, [> error ]) Tar.t = fun state len -> | ||||||
let open Tar in | ||||||
let* _buf = really_read_through_gz state len in | ||||||
Tar.return (Ok 0 (* XXX(dinosaure): actually, [fold] ignores the result. *)) | ||||||
|
||||||
type 'err run = { run : 'a 'err. ('a, 'err) Tar.t -> ('a, 'err) result } [@@unboxed] | ||||||
|
||||||
let fold_with_gz | ||||||
: run:[> error ] run -> _ -> _ -> _ | ||||||
= fun ~run:{ run } f init -> | ||||||
let rec go : type a. decoder -> (a, [> error ] as 'err) Tar.t -> (a, 'err) Tar.t = fun decoder -> function | ||||||
| Tar.Really_read len -> really_read_through_gz decoder len | ||||||
reynir marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| Tar.Read _len -> assert false (* XXX(dinosaure): actually does not emit [Tar.Read]. *) | ||||||
| Tar.Seek len -> seek_through_gz decoder len | ||||||
| Tar.Return v -> Tar.return v | ||||||
| Tar.Bind (x, f) -> | ||||||
match run x with | ||||||
| Ok value -> go decoder (f value) | ||||||
| Error _ as err -> Tar.return err in | ||||||
let decoder = | ||||||
let oc_buffer = De.bigstring_create 0x1000 in | ||||||
{ gz= Gz.Inf.decoder `Manual ~o:oc_buffer | ||||||
; oc_buffer | ||||||
; ic_buffer= De.bigstring_create 0x1000 | ||||||
; tp_length= 0x1000 | ||||||
; pos= 0 } in | ||||||
go decoder (Tar.fold f init) | ||||||
|
||||||
(* | ||||||
module Make | ||||||
(Async : Tar.ASYNC) | ||||||
(Writer : Tar.WRITER with type 'a io = 'a Async.t) | ||||||
|
@@ -108,75 +187,6 @@ module Make | |||||
go gz (str, 0, String.length str) | ||||||
end | ||||||
|
||||||
module Gz_reader = struct | ||||||
type in_channel = | ||||||
{ mutable gz : Gz.Inf.decoder | ||||||
; ic_buffer : De.bigstring | ||||||
; oc_buffer : De.bigstring | ||||||
; tp_buffer : bytes | ||||||
; in_channel : Reader.in_channel | ||||||
; mutable pos : int } | ||||||
|
||||||
type 'a io = 'a Async.t | ||||||
|
||||||
let really_read | ||||||
: in_channel -> bytes -> unit io | ||||||
= fun ({ ic_buffer; oc_buffer; in_channel; tp_buffer; _ } as state) res -> | ||||||
let rec until_full_or_end gz (res, res_off, res_len) = | ||||||
match Gz.Inf.decode gz with | ||||||
| `Flush gz -> | ||||||
let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in | ||||||
let len = min res_len max in | ||||||
bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; | ||||||
if len < max | ||||||
then ( state.pos <- len | ||||||
; state.gz <- gz | ||||||
; Async.return () ) | ||||||
else until_full_or_end (Gz.Inf.flush gz) (res, res_off + len, res_len - len) | ||||||
| `End gz -> | ||||||
let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in | ||||||
let len = min res_len max in | ||||||
bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; | ||||||
if res_len > len | ||||||
then raise End_of_file | ||||||
else ( state.pos <- len | ||||||
; state.gz <- gz | ||||||
; Async.return () ) | ||||||
| `Await gz -> | ||||||
Reader.read in_channel tp_buffer >>= fun len -> | ||||||
bigstring_blit_string (Bytes.unsafe_to_string tp_buffer) ~src_off:0 ic_buffer ~dst_off:0 ~len; | ||||||
let gz = Gz.Inf.src gz ic_buffer 0 len in | ||||||
until_full_or_end gz (res, res_off, res_len) | ||||||
| `Malformed err -> failwith ("gzip: " ^ err) in | ||||||
let max = (De.bigstring_length oc_buffer - Gz.Inf.dst_rem state.gz) - state.pos in | ||||||
let len = min (Bytes.length res) max in | ||||||
bigstring_blit_bytes oc_buffer ~src_off:state.pos res ~dst_off:0 ~len; | ||||||
if len < max | ||||||
then ( state.pos <- state.pos + len | ||||||
; Async.return () ) | ||||||
else until_full_or_end (Gz.Inf.flush state.gz) (res, len, Bytes.length res - len) | ||||||
|
||||||
let skip : in_channel -> int -> unit io = fun state len -> | ||||||
let res = Bytes.create len in | ||||||
really_read state res | ||||||
end | ||||||
|
||||||
module HeaderWriter = Tar.HeaderWriter (Async) (Gz_writer) | ||||||
module HeaderReader = Tar.HeaderReader (Async) (Gz_reader) | ||||||
|
||||||
type in_channel = Gz_reader.in_channel | ||||||
|
||||||
let of_in_channel ~internal:oc_buffer in_channel = | ||||||
{ Gz_reader.gz= Gz.Inf.decoder `Manual ~o:oc_buffer | ||||||
; oc_buffer | ||||||
; ic_buffer= De.bigstring_create 0x1000 | ||||||
; tp_buffer= Bytes.create 0x1000 | ||||||
; in_channel | ||||||
; pos= 0 } | ||||||
|
||||||
let really_read = Gz_reader.really_read | ||||||
let skip = Gz_reader.skip | ||||||
|
||||||
type out_channel = Gz_writer.out_channel | ||||||
|
||||||
let of_out_channel ?bits:(w_bits= 15) ?q:(q_len= 0x1000) ~level ~mtime os out_channel = | ||||||
|
@@ -230,3 +240,4 @@ module Make | |||||
| `End _gz -> Async.return () in | ||||||
until_end (Gz.Def.src state.gz De.bigstring_empty 0 0) | ||||||
end | ||||||
*) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For
Bind (x, f)
the valuesx
andf
share the same'err
. This is likely just fine, but it could maybe be generalized (though I don't think it's worth it going through those hoops).