diff --git a/.nojekyll b/.nojekyll
new file mode 100644
index 0000000..e69de29
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..72f1eba
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,85 @@
+FROM jupyter/base-notebook:latest
+
+# Install .NET CLI dependencies
+
+ARG NB_USER=fsdocs-user
+ARG NB_UID=1000
+ENV USER ${NB_USER}
+ENV NB_UID ${NB_UID}
+ENV HOME /home/${NB_USER}
+
+WORKDIR ${HOME}
+
+USER root
+RUN apt-get update
+RUN apt-get install -y curl
+
+ENV \
+ # Enable detection of running in a container
+ DOTNET_RUNNING_IN_CONTAINER=true \
+ # Enable correct mode for dotnet watch (only mode supported in a container)
+ DOTNET_USE_POLLING_FILE_WATCHER=true \
+ # Skip extraction of XML docs - generally not useful within an image/container - helps performance
+ NUGET_XMLDOC_MODE=skip \
+ # Opt out of telemetry until after we install jupyter when building the image, this prevents caching of machine id
+ DOTNET_INTERACTIVE_CLI_TELEMETRY_OPTOUT=true
+
+# Install .NET CLI dependencies
+RUN apt-get update \
+ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+ libc6 \
+ libgcc1 \
+ libgssapi-krb5-2 \
+ libicu66 \
+ libssl1.1 \
+ libstdc++6 \
+ zlib1g \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install .NET Core SDK
+
+# When updating the SDK version, the sha512 value a few lines down must also be updated.
+ENV DOTNET_SDK_VERSION 5.0.101
+
+RUN dotnet_sdk_version=5.0.101 \
+ && curl -SL --output dotnet.tar.gz https://dotnetcli.azureedge.net/dotnet/Sdk/$dotnet_sdk_version/dotnet-sdk-$dotnet_sdk_version-linux-x64.tar.gz \
+ && dotnet_sha512='398d88099d765b8f5b920a3a2607c2d2d8a946786c1a3e51e73af1e663f0ee770b2b624a630b1bec1ceed43628ea8bc97963ba6c870d42bec064bde1cd1c9edb' \
+ && echo "$dotnet_sha512 dotnet.tar.gz" | sha512sum -c - \
+ && mkdir -p /usr/share/dotnet \
+ && tar -ozxf dotnet.tar.gz -C /usr/share/dotnet \
+ && rm dotnet.tar.gz \
+ && ln -s /usr/share/dotnet/dotnet /usr/bin/dotnet \
+ # Trigger first run experience by running arbitrary cmd
+ && dotnet help
+
+# Copy notebooks
+
+COPY ./ ${HOME}/notebooks/
+
+# Copy package sources
+
+COPY ./NuGet.config ${HOME}/nuget.config
+
+RUN chown -R ${NB_UID} ${HOME}
+USER ${USER}
+
+#Install nteract
+RUN pip install nteract_on_jupyter
+
+# Install lastest build from master branch of Microsoft.DotNet.Interactive
+RUN dotnet tool install -g Microsoft.dotnet-interactive --add-source "https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json"
+
+#latest stable from nuget.org
+#RUN dotnet tool install -g Microsoft.dotnet-interactive --add-source "https://api.nuget.org/v3/index.json"
+
+ENV PATH="${PATH}:${HOME}/.dotnet/tools"
+RUN echo "$PATH"
+
+# Install kernel specs
+RUN dotnet interactive jupyter install
+
+# Enable telemetry once we install jupyter for the image
+ENV DOTNET_INTERACTIVE_CLI_TELEMETRY_OPTOUT=false
+
+# Set root to notebooks
+WORKDIR ${HOME}/notebooks/
\ No newline at end of file
diff --git a/NuGet.config b/NuGet.config
new file mode 100644
index 0000000..cf1ace5
--- /dev/null
+++ b/NuGet.config
@@ -0,0 +1,14 @@
+
+
The BioProviders package provides tools and functionality to simplify accessing and manipulating bioinformatic data. +The .NET Bio and BioFSharp libraries +are used to parse and format the data provided by this package.
+BioProviders is available through NuGet. +
+BioProviders implements Type Providers for accessing bioinformatic datasets. These Type Providers allow remote access +to data sources (e.g., GenBank) and type-safe representations of their data (e.g., GenBank Flat File).
+GenBankProvider<..>
type.RefSeqProvider<..>
type.The project is hosted on GitHub where you can +report issues, fork the project and submit pull requests.
+The library is available under the OSI-approved MIT license. For more information see the +License file in the GitHub repository.
+ + +This article describes how to use the GenBank Type Provider to remotely access genomic data stored in the +GenBank database. This Type Provider collects and parses the genomic data +for a specified organism and generates a static type containing its metadata and sequence.
+The GenBank Type Provider uses .NET Bio to parse the GenBank data files +and BioFSharp to provide utilities for manipulating genomic sequences.
+To load the GenBank Type Provider, a script can use the NuGet syntax to reference the BioProviders package, shown below.
+You can optionally include the BioFSharp package. While it's not required to use the basic BioProviders functions, it can be used to explore the metadata of the provided types, as shown in a later example.
+#r "nuget: BioProviders"
+#r "nuget: BioFSharp"
+
+If creating an F# library or application, BioProviders can be added as a package reference. You can use your IDE for this, or use the dotnet add package BioProviders
command in your project folder from the command line.
BioProviders can then be used in your script or code by using an open command. Opening its dependencies should not be required. (BioFSharp is loaded for future examples.)
+open BioProviders
+open BioFSharp
+
+The GenBank Type Provider will be demonstrated for this GenBank assembly +of the Candidatus Carsonella ruddii species. To create a typed representation of the assembly, two pieces of information +must be given to the Type Provider:
+For this example, the species name is "Candidatus Carsonella ruddii" and the GenBank assembly accession is "GCA_001274515.1". +To find this information:
+You can then select the assembly's GenBank (as well as RefSeq) accession from the list that appears.
+ +Passing this information to the Type Provider generates the Assembly Type. The genomic data can then be extracted from the +Assembly Type by invoking the Genome method. This is demonstrated below.
+// Define species name and GenBank assembly accession.
+let [<Literal>] Species = "Candidatus Carsonella ruddii"
+let [<Literal>] Accession = "GCA_001274515.1"
+
+// Create GenBank assembly type.
+type Ruddii = GenBankProvider<Species, Accession>
+
+// Extract statically-typed genome data.
+let genome = Ruddii.Genome()
+
+Each genome is accompanied by metadata describing the organism and sequence recorded in the assembly. This metadata can +be extracted using the Metadata field of the Genome Type created previously. The Metadata type is largely based on that +provided by .NET Bio, with modifications +made to be more idiomatic with F#.
+Below is an example of how the raw metadata type can be retrieved and displayed:
+// Extract the metadata.
+let metadata = genome.Metadata
+
+// Display the metadata type.
+printf "%A" metadata
+
+
|
The metadata type consists of many fields, though not all fields of the metadata exist for all assemblies. Therefore, they are provided as option types, on which a match expression can be used. Below are examples of accessing fields from the example assembly.
+
+
+✅ Example - Accessing a field that is provided.
// Print definition if exists.
+match metadata.Definition with
+| Some definition -> printf "%s" definition
+| None -> printf "No definition provided."
+
+
|
+❌ Example - Accessing a field that is not provided.
// Print database source if exists.
+match metadata.DbSource with
+| Some dbsource -> printf "%s" dbsource
+| None -> printf "No database source provided."
+
+
|
The genomic sequence for the organism can be extracted using the Sequence field of the Genome Type created previously. +This field provides a BioFSharp BioSeq containing +a series of Nucleotides. More +can be read about BioFSharp containers here.
+An example of accessing and manipulating the GenBankProvider genomic sequence using BioFSharp is provided below:
+// Extract the BioFSharp BioSeq.
+let sequence = genome.Sequence
+
+// Display the sequence type.
+printf "%A" sequence
+
+
|
// Take the complement, then transcribe and translate the coding strand.
+sequence
+|> BioSeq.complement
+|> BioSeq.transcribeCodingStrand
+|> BioSeq.translate 0
+
+
|
Wildcard operators are supported in both the Species and Accession provided to the GenBankProvider. By using asterisks "*" +at the end of a Species or Accession name, species or accessions starting with the provided pattern will be matched.
+For example, we can get all Staphylococcus species starting with the letter 'c' and assembly accesions starting with +'GCA_01':
+// Define species name and GenBank assembly accession using wildcards.
+let [<Literal>] SpeciesPattern = "Staphylococcus c*"
+let [<Literal>] AccessionPattern = "GCA_01*"
+
+// Create GenBank type containing all species matching the species pattern.
+type SpeciesCollection = GenBankProvider<SpeciesPattern, AccessionPattern>
+
+// Select the species types.
+type Capitis = SpeciesCollection.``Staphylococcus capitis``
+type Cohnii = SpeciesCollection.``Staphylococcus cohnii``
+
+// Select assemblies.
+type Assembly1 = Capitis.``GCA_012926605.1``
+type Assembly2 = Capitis.``GCA_015645205.1``
+type Assembly3 = Cohnii.``GCA_013349225.1``
+type Assembly4 = Cohnii.``GCA_014884245.1``
+
+// Extract statically-typed genome data.
+let data = Assembly1.Genome()
+
+// Show the assembly's definition.
+match data.Metadata.Definition with
+| Some definition -> printf "%s" definition
+| None -> printf "No definition provided."
+
+
|
The Accession parameter can also be omitted from the GenBankProvider. In this case, all assemblies for the given species will +be matched. For example:
+// Define species name.
+let [<Literal>] SpeciesName = "Staphylococcus lugdunensis"
+
+// Create GenBank type containing all assemblies for the species.
+type Assemblies = GenBankProvider<SpeciesName>
+
+// Select assemblies.
+type Assembly = Assemblies.``GCA_001546615.1``
+
+// Show the assembly's primary accession.
+match (Assembly.Genome()).Metadata.Accession with
+| Some accession -> match accession.Primary with
+ | Some primary -> printf "%s" primary
+ | None -> printf "No primary accession provided."
+| None -> printf "No accession provided."
+
+
|
This article describes how to use the RefSeq Type Provider to remotely access genomic data stored in the +RefSeq database. This Type Provider collects and parses the genomic data +for a specified organism and generates a static type containing its metadata and sequence.
+The RefSeq Type Provider uses .NET Bio to parse the RefSeq data files +and BioFSharp to provide utilities for manipulating genomic sequences.
+To load the RefSeq Type Provider, a script can use the NuGet syntax to reference the BioProviders package, shown below.
+You can optionally include the BioFSharp package. While it's not required to use the basic BioProviders functions, it can be used to explore the metadata of the provided types, as shown in a later example.
+#r "nuget: BioProviders"
+#r "nuget: BioFSharp"
+
+If creating an F# library or application, BioProviders can be added as a package reference. You can use your IDE for this, or use the dotnet add package BioProviders
command in your project folder from the command line.
BioProviders can then be used in your script or code by using an open command. Opening its dependencies should not be required. (BioFSharp is loaded for future examples.)
+open BioProviders
+open BioFSharp
+
+The RefSeq Type Provider will be demonstrated for this RefSeq assembly +of the Staphylococcus borealis species. To create a typed representation of the assembly, two pieces of information +must be given to the Type Provider:
+For this example, the species name is "Staphylococcus borealis" and the RefSeq assembly accession is "GCF_001224225.1". +To find this information:
+You can then select the assembly's RefSeq (as well as GenBank) accession from the list that appears.
+ +Passing this information to the Type Provider generates the Assembly Type. The genomic data can then be extracted from the +Assembly Type by invoking the Genome method. This is demonstrated below.
+// Define species name and RefSeq assembly accession.
+let [<Literal>] Species = "Staphylococcus borealis"
+let [<Literal>] Accession = "GCF_001224225.1"
+
+// Create RefSeq assembly type.
+type Borealis = RefSeqProvider<Species, Accession>
+
+// Extract statically-typed genome data.
+let genome = Borealis.Genome()
+
+Each genome is accompanied by metadata describing the organism and sequence recorded in the assembly. This metadata can +be extracted using the Metadata field of the Genome Type created previously. The Metadata type is largely based on that +provided by .NET Bio, with modifications +made to be more idiomatic with F#.
+Below is an example of how the raw metadata type can be retrieved and displayed:
+// Extract the metadata.
+let metadata = genome.Metadata
+
+// Display the metadata type.
+printf "%A" metadata
+
+
|
The metadata type consists of many fields, though not all fields of the metadata exist for all assemblies. Therefore, they are provided as option types, on which a match expression can be used. Below are examples of accessing fields from the example assembly.
+
+
+✅ Example - Accessing a field that is provided.
// Print definition if exists.
+match metadata.Definition with
+| Some definition -> printf "%s" definition
+| None -> printf "No definition provided."
+
+
|
+❌ Example - Accessing a field that is not provided.
// Print database source if exists.
+match metadata.DbSource with
+| Some dbsource -> printf "%s" dbsource
+| None -> printf "No database source provided."
+
+
|
The genomic sequence for the organism can be extracted using the Sequence field of the Genome Type created previously. +This field provides a BioFSharp BioSeq containing +a series of Nucleotides. More +can be read about BioFSharp containers here.
+An example of accessing and manipulating the RefSeqProvider genomic sequence using BioFSharp is provided below:
+// Extract the BioFSharp BioSeq.
+let sequence = genome.Sequence
+
+// Display the sequence type.
+printf "%A" sequence
+
+
|
// Take the complement, then transcribe and translate the coding strand.
+sequence
+|> BioSeq.complement
+|> BioSeq.transcribeCodingStrand
+|> BioSeq.translate 0
+
+
|
Wildcard operators are supported in both the Species and Accession provided to the RefSeqProvider. By using asterisks "*" +at the end of a Species or Accession name, species or accessions starting with the provided pattern will be matched.
+For example, we can get all Staphylococcus species starting with the letter 'c' and assembly accesions starting with +'GCF_01':
+// Define species name and RefSeq assembly accession using wildcards.
+let [<Literal>] SpeciesPattern = "Staphylococcus c*"
+let [<Literal>] AccessionPattern = "GCF_01*"
+
+// Create RefSeq type containing all species matching the species pattern.
+type SpeciesCollection = RefSeqProvider<SpeciesPattern, AccessionPattern>
+
+// Select the species types.
+type Capitis = SpeciesCollection.``Staphylococcus capitis``
+type Cohnii = SpeciesCollection.``Staphylococcus cohnii``
+
+// Select assemblies.
+type Assembly1 = Capitis.``GCF_012926605.1``
+type Assembly2 = Capitis.``GCF_012926635.1``
+type Assembly3 = Cohnii.``GCF_013602215.1``
+type Assembly4 = Cohnii.``GCF_013602265.1``
+
+// Extract statically-typed genome data.
+let data = Assembly1.Genome()
+
+// Show the assembly's definition.
+match data.Metadata.Definition with
+| Some definition -> printf "%s" definition
+| None -> printf "No definition provided."
+
+
|
The Accession parameter can also be omitted from the RefSeqProvider. In this case, all assemblies for the given species will +be matched. For example:
+// Define species name.
+let [<Literal>] SpeciesName = "Staphylococcus lugdunensis"
+
+// Create RefSeq type containing all assemblies for the species.
+type Assemblies = RefSeqProvider<SpeciesName>
+
+// Select assemblies.
+type Assembly = Assemblies.``GCF_001546615.1``
+
+// Show the assembly's primary accession.
+match (Assembly.Genome()).Metadata.Accession with
+| Some accession -> match accession.Primary with
+ | Some primary -> printf "%s" primary
+ | None -> printf "No primary accession provided."
+| None -> printf "No accession provided."
+
+
|
+ Namespace + | ++ Description + | +