diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..163eb75 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*.cr] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +indent_style = space +indent_size = 2 +trim_trailing_whitespace = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0bbd4a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/docs/ +/lib/ +/bin/ +/.shards/ +*.dwarf + +# Libraries don't need dependency lock +# Dependencies will be locked in applications that use them +/shard.lock diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..85e8c42 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Robert Schulze + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e3e7272 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# GDBMish + +Convert crystal data structures into a `gdpm_dump` ASCII format. + +Citing [gdbm](https://git.gnu.org.ua/gdbm.git/tree/NOTE-WARNING): +> Gdbm files have never been `portable' between different operating systems, +> system architectures, or potentially even different compilers. Differences +> in byte order, the size of file offsets, and even structure packing make +> gdbm files non-portable. +> +> Therefore, if you intend to send your database to somebody over the wire, +> please dump it into a portable format using gdbm_dump and send the resulting +> file instead. The receiving party will be able to recreate the database from +> the dump using the gdbm_load command. + +GDBMish does that by reimplementing the `gdpm_dump` ASCII format without compiling against `gdbm` + +## Installation + +1. Add the dependency to your `shard.yml`: + + ```yaml + dependencies: + gdbmish: + github: fnordfish/gdbmish + ``` + +2. Run `shards install` + +## Usage + +```crystal +require "gdbmish" +``` + +```crystal +# Get dump as String +string = Gdbmish::Dump.ascii({"key1" => "value", "key2" => value}) + +# Write directly into File (or any other IO) +File.open("my_db.dump", "w") do |file| + Gdbmish::Dump.ascii({"key1" => "value", "key2" => value}, file) +end + +# Provide an original filename +Gdbmish::Dump.ascii(data, file: "my.db") + +# Provide an original filename and file permissions +Gdbmish::Dump.ascii(data, file: "my.db", uid: "1000", gid: "1000", mode: 0o600) +``` + +## Development + +TODO: Write development instructions here + +## Limitations + +* Currently only supports the ASCII format and not the Binary format +* Currently requires a `Hash` or `NamedTuple` with `String` keys and values + + it would be nice to provide a "consumer" style API for dumping larger data sets +* Currently only supports creating a dump + + it would be nice to also read dumps + +## Contributing + +1. Fork it () +2. Create your feature branch (`git checkout -b my-new-feature`) +3. Commit your changes (`git commit -am 'Add some feature'`) +4. Push to the branch (`git push origin my-new-feature`) +5. Create a new Pull Request + +## Contributors + +- [Robert Schulze](https://github.com/fnordfish) - creator and maintainer diff --git a/shard.yml b/shard.yml new file mode 100644 index 0000000..525a7da --- /dev/null +++ b/shard.yml @@ -0,0 +1,11 @@ +name: gdbmish +description: Convert crystal data structures into a `gdpm_dump` ASCII format. + +version: 0.1.0 + +authors: + - Robert Schulze + +crystal: "~> 1.0" + +license: MIT diff --git a/spec/fixtures/create_test_db.rb b/spec/fixtures/create_test_db.rb new file mode 100644 index 0000000..e6fccce --- /dev/null +++ b/spec/fixtures/create_test_db.rb @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Using the Ruby GDBM bindings as a conivinient way of creating +# some test data + +require "bundler" +require "bundler/inline" + +Bundler.settings.temporary(frozen: false, deployment: false) do + gemfile do + source "https://rubygems.org" + platform "ruby" do + gem 'gdbm' + end + end +end + +require 'gdbm' + +data = { + "föö" => "bää\n🤦‍♂️", + "foo2" => "bar2", + "foo" => ("bar-"*128) +} + +GDBM.open("test.db", 0666, GDBM::NEWDB) do |db| + data.each { |k,v| db[k] = v } +end \ No newline at end of file diff --git a/spec/fixtures/test.db b/spec/fixtures/test.db new file mode 100644 index 0000000..1d9819a Binary files /dev/null and b/spec/fixtures/test.db differ diff --git a/spec/fixtures/test.dump b/spec/fixtures/test.dump new file mode 100644 index 0000000..840cc6f --- /dev/null +++ b/spec/fixtures/test.dump @@ -0,0 +1,28 @@ +# GDBM dump file created by GDBM version 1.23. 04/02/2022 (built Feb 5 2022 18:08:24) on Mon Jun 13 16:53:10 2022 +#:version=1.1 +#:file=test.db +#:uid=501,user=robertschulze,gid=20,group=staff,mode=600 +#:format=standard +# End of header +#:len=5 +ZsO2w7Y= +#:len=19 +YsOkw6QK8J+kpuKAjeKZgu+4jw== +#:len=4 +Zm9vMg== +#:len=4 +YmFyMg== +#:len=3 +Zm9v +#:len=512 +YmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1i +YXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJh +ci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFy +LWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXIt +YmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1i +YXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJh +ci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFy +LWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXIt +YmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci1iYXItYmFyLWJhci0= +#:count=3 +# End of data diff --git a/spec/gdbm_dumpish_spec.cr b/spec/gdbm_dumpish_spec.cr new file mode 100644 index 0000000..b8dad25 --- /dev/null +++ b/spec/gdbm_dumpish_spec.cr @@ -0,0 +1,57 @@ +require "./spec_helper" + +data = { + föö: "bää\n🤦‍♂️", + foo2: "bar2", + foo: ("bar-"*128), +} +dumped_without_header = File.read("spec/fixtures/test.dump").split("# End of header\n")[1] + +describe Gdbmish do + describe Gdbmish::Dump do + it "Dumps NamedTuple" do + Gdbmish::Dump.ascii(data).should end_with(dumped_without_header) + end + + it "Dumps Hash" do + Gdbmish::Dump.ascii(data.to_h).should end_with(dumped_without_header) + end + + it "Dumps into an IO" do + io = IO::Memory.new + io << "# my open IO" + + Gdbmish::Dump.ascii(data, io) + str = io.to_s + str.should start_with("# my open IO") + str.should end_with(dumped_without_header) + end + + it "Dumps filename and permissions" do + str = Gdbmish::Dump.ascii(data, file: "test.db", uid: "501", user: "robertschulze", gid: "20", group: "staff", mode: 0o600) + str.should contain("#:file=test.db") + str.should contain("#:uid=501,user=robertschulze,gid=20,group=staff,mode=600") + end + + it "Dumps filename and partial permissions" do + str = Gdbmish::Dump.ascii(data, file: "test.db", uid: "501", gid: "20", mode: 0o600) + str.should contain("#:file=test.db") + str.should contain("#:uid=501,gid=20,mode=600") + end + + it "Dumps skips permissions if filename is missing" do + str = Gdbmish::Dump.ascii(data, uid: "501", gid: "20", mode: 0o600) + str.should_not contain("#:file=test.db") + str.should_not contain("uid=501") + str.should_not contain("gid=20") + str.should_not contain("mode=600") + end + + it "keeps lines at GDBM_MAX_DUMP_LINE_LEN" do + data.values.any? { |v| v.size > Gdbmish::Dump::GDBM_MAX_DUMP_LINE_LEN }.should be_true + Gdbmish::Dump.ascii(data).split("# End of header\n")[1].each_line do |line| + line.size.should be <= Gdbmish::Dump::GDBM_MAX_DUMP_LINE_LEN + end + end + end +end diff --git a/spec/spec_helper.cr b/spec/spec_helper.cr new file mode 100644 index 0000000..bf7a2bc --- /dev/null +++ b/spec/spec_helper.cr @@ -0,0 +1,2 @@ +require "spec" +require "../src/gdbmish" diff --git a/src/gdbmish.cr b/src/gdbmish.cr new file mode 100644 index 0000000..5383a16 --- /dev/null +++ b/src/gdbmish.cr @@ -0,0 +1,6 @@ +require "./gdbmish/*" + +# See `Gdbmish::Dump` for generating dumps from data +module Gdbmish + VERSION = {{ "#{system("shards version").strip}" }} +end diff --git a/src/gdbmish/dump.cr b/src/gdbmish/dump.cr new file mode 100644 index 0000000..aba053c --- /dev/null +++ b/src/gdbmish/dump.cr @@ -0,0 +1,87 @@ +require "base64" + +module Gdbmish + module Dump + # GDBMs does not split base64 strings at 60 encoded characters (as defined by RFC 2045). + # See [gdbmdefs.h](https://git.gnu.org.ua/gdbm.git/tree/src/gdbmdefs.h) + GDBM_MAX_DUMP_LINE_LEN = 76 + + # Dump the given data in standard ASCII format into a provided `IO`. + # + # Dumping file information is optional. + # * *uid*, *user*, *gid*, *group* and *mode* will only be used when *file* is given + # * *user* will only be used when *uid* is given + # * *group* will only be used when *gid* is given + def self.ascii( + data : (Hash | NamedTuple), + io : (IO), + file : String? = nil, + uid : String? = nil, + user : String? = nil, + gid : String? = nil, + group : String? = nil, + mode : Int32? = nil + ) + io.printf("# GDBM dump file created by GDBMish version %s on %s\n", Gdbmish::VERSION, Time.local.to_rfc2822) + io.puts("#:version=1.1") + + if file + io.printf("#:file=%s\n", file) + l = [] of String + + if uid + l << sprintf("uid=%d", uid) + l << sprintf("user=%s", user) if user + end + + if gid + l << sprintf("gid=%d", gid) + l << sprintf("group=%s", group) if group + end + + l << sprintf("mode=%03o", mode & 0o777) if mode + + unless l.empty? + io << "#:" + io.puts(l.join(",")) + end + end + + io.puts("#:format=standard") + io.puts("# End of header") + + data.each do |k, v| + io << ascii_dump_datum(k.to_s) + io << ascii_dump_datum(v.to_s) + end + + io.printf("#:count=%d\n", data.size) + io.puts("# End of data") + end + + # Like `ascii` but builds a new `String` + def self.ascii(data : (Hash | NamedTuple), **options) : String + String.build do |str| + self.ascii( + data, + str, + **options + ) + end + end + + private def self.ascii_dump_datum(datum : String) : String + String.build do |str| + str.printf("#:len=%d\n", datum.bytesize) + str.puts(Base64.strict_encode(datum).try do |enc| + if enc.size > GDBM_MAX_DUMP_LINE_LEN + slices = enc.each_char.each_slice(GDBM_MAX_DUMP_LINE_LEN) + slices.map(&.join).join("\n") + else + enc + end + end) + end + end + end +end