diff --git a/.gitignore b/.gitignore
index 6e3bdf9..290f917 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,6 @@ pyplink/version.py
.coveragerc
htmlcov
+.ipynb_checkpoints
+
build
diff --git a/.travis.yml b/.travis.yml
index e335212..6da901f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,4 +3,5 @@ python:
- "2.7"
- "3.3"
- "3.4"
+ - "3.5"
script: "python setup.py test"
diff --git a/README.mkd b/README.mkd
index 80b6197..963532e 100644
--- a/README.mkd
+++ b/README.mkd
@@ -2,9 +2,9 @@
[![PyPI version](https://badge.fury.io/py/pyplink.svg)](http://badge.fury.io/py/pyplink)
-# pyplink - Module to read binary files from Plink
+# pyplink - Module to process Plink's binary files
-`PyPlink` is a Python module to read binary Plink files.
+`PyPlink` is a Python module to read and write Plink's binary files.
## Dependencies
@@ -13,7 +13,8 @@ The tool requires a standard [Python](http://python.org/) installation (2.7 or
3.4) with the following modules:
1. [numpy](http://www.numpy.org/) version 1.8.2 or latest
-1. [pandas](http://pandas.pydata.org/) version 0.14.1 or latest
+2. [pandas](http://pandas.pydata.org/) version 0.14.1 or latest
+3. [six](https://pythonhosted.org/six/) version 1.9.0 or latest
The tool has been tested on *Linux* only, but should work on *MacOS* and
*Windows* operating systems as well.
@@ -34,8 +35,8 @@ conda install pyplink -c http://statgen.org/wp-content/uploads/Softwares/pyplink
```
It is possible to add the channel to conda's configuration, so that the
-`-c http://statgen.org/...` can be omitted to update or install. Only perform
-the following command:
+`-c http://statgen.org/...` can be omitted to update or install the package.
+To add the channel, perform the following command:
```bash
conda config --add channels http://statgen.org/wp-content/uploads/Softwares/pyplink
@@ -61,146 +62,6 @@ conda update pyplink -c http://statgen.org/wp-content/uploads/Softwares/pyplink
```
-## Example
-
-This example describe how to work with the `pyplink` module.
-
-
-### Data description
-
-```python
->>> from pyplink import PyPlink
->>> pedfile = PyPlink("prefix")
->>> pedfile.get_nb_samples()
-10656
->>> pedfile.get_nb_markers()
-141701
->>> samples = pedfile.get_fam()
->>> samples.head()
- fid iid father mother gender status
-0 Sample_1 Sample_1 0 0 2 -9
-1 Sample_2 Sample_2 0 0 1 -9
-2 Sample_3 Sample_3 0 0 2 -9
-3 Sample_4 Sample_4 0 0 1 -9
-4 Sample_5 Sample_5 0 0 2 -9
->>> all_markers = pedfile.get_bim()
->>> all_markers.head()
- chrom pos a1 a2
-snp
-exm2268640 1 762320 A G
-exm47 1 865628 A G
-exm53 1 865665 A G
-exm55 1 865694 A G
-exm56 1 865700 A G
-```
-
-
-### Iterating over all markers
-
-Cycling through genotypes as `-1`, `0`, `1` and `2` values, where `-1` is
-unknown, `0` is homozygous (major allele), `1` is heterozygous and `2` is
-homozygous (minor allele).
-
-```python
->>> for marker_id, genotypes in pedfile:
-... print(marker_id, genotypes)
-... break
-...
-exm2268640 [0 0 0 ..., 0 0 0]
->>> for marker_id, genotypes in pedfile.iter_geno():
-... print(marker_id, genotypes)
-... break
-...
-exm2268640 [0 0 0 ..., 0 0 0]
-```
-
-Cycling through genotypes as `A`, `C`, `G`, `T` values.
-
-```python
->>> for marker_id, genotypes in pedfile.iter_acgt_geno():
-... print(marker_id, genotypes)
-... break
-...
-exm2268640 ['GG' 'GG' 'GG' ..., 'GG' 'GG' 'GG']
-```
-
-
-### Iterating over selected markers
-
-Cycling through genotypes as `-1`, `0`, `1` and `2` values.
-
-```python
->>> markers = ["exm47", "exm2253575", "exm269"]
->>> for marker_id, genotypes in pedfile.iter_geno_marker(markers):
-... print(marker_id, genotypes)
-...
-exm47 [0 0 0 ..., 0 0 0]
-exm2253575 [1 1 1 ..., 0 0 2]
-exm269 [0 0 0 ..., 0 1 0]
-```
-
-Cycling through genotypes as `A`, `C`, `G`, `T` values.
-
-```python
->>> for marker_id, genotypes in pedfile.iter_acgt_geno_marker(markers):
-... print(marker_id, genotypes)
-...
-exm47 ['GG' 'GG' 'GG' ..., 'GG' 'GG' 'GG']
-exm2253575 ['GA' 'GA' 'GA' ..., 'AA' 'AA' 'GG']
-exm269 ['GG' 'GG' 'GG' ..., 'GG' 'AG' 'GG']
-```
-
-
-### Getting a single marker
-
-To get the genotypes (as `-1`, `0`, `1` and `2` values) of a single marker:
-```python
->>> pedfile.get_geno_marker("exm47")
-[0 0 0 ..., 0 0 0]
-```
-
-To get the genotypes (as `A`, `C`, `G`, `T` values) of a single marker:
-```python
->>> pedfile.get_acgt_geno_marker("exm47")
-['GG' 'GG' 'GG' ..., 'GG' 'GG' 'GG']
-```
-
-
-### Misc example
-
-To get all markers on the Y chromosomes for the males.
-
-```python
->>> y_markers = all_markers[all_markers.chrom == 23].index.values
->>> males = samples.gender == 1
->>> for marker_id, genotypes in pedfile.iter_geno_marker(y_markers):
-... male_genotypes = genotypes[males.values]
-... print("{:,d} total genotypes".format(len(genotypes)))
-... print("{:,d} genotypes for {:,d} "
-... "males".format(len(male_genotypes), males.sum()))
-... break
-...
-10,656 total genotypes
-6,297 genotypes for 6,297 males
-```
-
-To count the minor allele frequency.
-
-```python
->>> from collections import Counter
->>> markers = ["exm47", "exm2253575", "exm269"]
->>> for marker_id, genotypes in pedfile.iter_geno_marker(markers):
-... geno_counter = Counter(genotypes)
-... nb_alleles = sum(geno_counter[i] * 2 for i in range(3))
-... maf = (geno_counter[2] * 2 + geno_counter[1]) / nb_alleles
-... print(marker_id, maf)
-...
-exm47 0.0070389488503050214
-exm2253575 0.3461719116956318
-exm269 0.05987799155326138
-```
-
-
## Testing
To test the module, just perform the following command:
@@ -208,9 +69,16 @@ To test the module, just perform the following command:
```python
>>> import pyplink
>>> pyplink.test()
-.................
+.......................
----------------------------------------------------------------------
-Ran 17 tests in 0.060s
+Ran 23 tests in 0.149s
OK
```
+
+
+## Example
+
+The following
+[notebook](http://nbviewer.ipython.org/github/lemieuxl/pyplink/blob/binary_write/demo/PyPlink%20Demo.ipynb)
+contains a demonstration (for both Python 2 and 3) of the `PyPlink` module.
diff --git a/conda_build.sh b/conda_build.sh
new file mode 100755
index 0000000..bf5d9c3
--- /dev/null
+++ b/conda_build.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Creating a directory for the skeleton
+mkdir -p skeleton
+pushd skeleton
+
+# Creating the skeleton
+conda skeleton pypi pyplink
+
+# The different python versions and platforms
+python_versions="2.7 3.3 3.4 3.5"
+platforms="linux-32 linux-64 osx-64 win-32 win-64"
+
+# Building
+for python_version in $python_versions
+do
+ # Building
+ conda build --python $python_version pyplink &> log.txt
+ filename=$(egrep "^# [$] binstar upload \S+$" log.txt | cut -d " " -f 5)
+
+ # Converting
+ for platform in $platforms
+ do
+ conda convert -p $platform $filename -o ../conda_dist
+ done
+done
+
+popd
+rm -rf skeleton
+
+# Indexing
+pushd conda_dist
+conda index *
+popd
diff --git a/demo/PyPlink Demo.ipynb b/demo/PyPlink Demo.ipynb
new file mode 100644
index 0000000..e1b9694
--- /dev/null
+++ b/demo/PyPlink Demo.ipynb
@@ -0,0 +1,1231 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from __future__ import print_function, division"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# `PyPlink`\n",
+ "\n",
+ "`PyPlink` is a Python module to read and write binary Plink files. Here are small examples for `PyPlink`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from pyplink import PyPlink"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Table of contents\n",
+ "\n",
+ "* [**Reading binary pedfile**](#Reading-binary-pedfile)\n",
+ " * [Getting the demo data](#Getting-the-demo-data)\n",
+ " * [Reading the binary file](#Reading-the-binary-file)\n",
+ " * [Getting dataset information](#Getting-dataset-information)\n",
+ " * [Iterating over all markers](#Iterating-over-all-markers)\n",
+ " * [*Additive format*](#iterating_over_all_additive)\n",
+ " * [*Nucleotide format*](#iterating_over_all_nuc)\n",
+ " * [Iterating over selected markers](#Iterating-over-selected-markers)\n",
+ " * [*Additive format*](#iterating_over_selected_additive)\n",
+ " * [*Nucleotide format*](#iterating_over_selected_nuc)\n",
+ " * [Extracting a single marker](#Extracting-a-single-marker)\n",
+ " * [*Additive format*](#extracting_additive)\n",
+ " * [*Nucleotide format*](#extracting_nuc)\n",
+ " * [Misc example](#Misc-example)\n",
+ " * [*Extracting a subset of markers and samples*](#Extracting-a-subset-of-markers-and-samples)\n",
+ " * [*Counting the allele frequency of markers*](#Counting-the-allele-frequency-of-markers)\n",
+ "\n",
+ "\n",
+ "* [**Writing binary pedfile**](#Writing-binary-pedfile)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Reading binary pedfile\n",
+ "\n",
+ "### Getting the demo data\n",
+ "\n",
+ "The [`Plink`](http://pngu.mgh.harvard.edu/~purcell/plink/) softwares provides a testing dataset on the [resources page](http://pngu.mgh.harvard.edu/~purcell/plink/res.shtml). It contains the 270 samples from the HapMap project (release 23) on build GRCh36/hg18."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "import zipfile\n",
+ "try:\n",
+ " from urllib.request import urlretrieve\n",
+ "except ImportError:\n",
+ " from urllib import urlretrieve\n",
+ "\n",
+ "# Downloading the demo data from Plink webset\n",
+ "urlretrieve(\n",
+ " \"http://pngu.mgh.harvard.edu/~purcell/plink/dist/hapmap_r23a.zip\",\n",
+ " \"hapmap_r23a.zip\",\n",
+ ")\n",
+ "\n",
+ "# Extracting the archive content\n",
+ "with zipfile.ZipFile(\"hapmap_r23a.zip\", \"r\") as z:\n",
+ " z.extractall(\".\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Reading the binary file\n",
+ "\n",
+ "To read a binary file, `PyPlink` only requires the prefix of the files."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "pedfile = PyPlink(\"hapmap_r23a\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Getting dataset information"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "270 samples and 4,098,136 markers\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"{:,d} samples and {:,d} markers\".format(\n",
+ " pedfile.get_nb_samples(),\n",
+ " pedfile.get_nb_markers(),\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fid | \n",
+ " iid | \n",
+ " father | \n",
+ " mother | \n",
+ " gender | \n",
+ " status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NA18524 | \n",
+ " NA18524 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " NA18526 | \n",
+ " NA18526 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NA18529 | \n",
+ " NA18529 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NA18532 | \n",
+ " NA18532 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " NA18537 | \n",
+ " NA18537 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fid iid father mother gender status\n",
+ "0 NA18524 NA18524 0 0 1 -9\n",
+ "1 NA18526 NA18526 0 0 2 -9\n",
+ "2 NA18529 NA18529 0 0 2 -9\n",
+ "3 NA18532 NA18532 0 0 2 -9\n",
+ "4 NA18537 NA18537 0 0 2 -9"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_samples = pedfile.get_fam()\n",
+ "all_samples.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " chrom | \n",
+ " pos | \n",
+ " cm | \n",
+ " a1 | \n",
+ " a2 | \n",
+ "
\n",
+ " \n",
+ " snp | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " rs10399749 | \n",
+ " 1 | \n",
+ " 45162 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " rs2949420 | \n",
+ " 1 | \n",
+ " 45257 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " T | \n",
+ "
\n",
+ " \n",
+ " rs4030303 | \n",
+ " 1 | \n",
+ " 72434 | \n",
+ " 0 | \n",
+ " A | \n",
+ " G | \n",
+ "
\n",
+ " \n",
+ " rs4030300 | \n",
+ " 1 | \n",
+ " 72515 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " rs3855952 | \n",
+ " 1 | \n",
+ " 77689 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " chrom pos cm a1 a2\n",
+ "snp \n",
+ "rs10399749 1 45162 0 0 C\n",
+ "rs2949420 1 45257 0 0 T\n",
+ "rs4030303 1 72434 0 A G\n",
+ "rs4030300 1 72515 0 0 C\n",
+ "rs3855952 1 77689 0 0 A"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_markers = pedfile.get_bim()\n",
+ "all_markers.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Iterating over all markers\n",
+ "\n",
+ "\n",
+ "#### Additive format\n",
+ "\n",
+ "Cycling through genotypes as `-1`, `0`, `1` and `2` values, where `-1` is unknown, `0` is homozygous (major allele), `1` is heterozygous, and `2` is homozygous (minor allele)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rs10399749\n",
+ "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0\n",
+ " 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 -1\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 -1 0 0 0\n",
+ " 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 -1 0 0 0 0 0 0 0\n",
+ " -1 0 -1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 -1 0 -1 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
+ ]
+ }
+ ],
+ "source": [
+ "for marker_id, genotypes in pedfile:\n",
+ " print(marker_id)\n",
+ " print(genotypes)\n",
+ " break"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rs10399749\n",
+ "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0\n",
+ " 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 -1\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 -1 0 0 0\n",
+ " 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 -1 0 0 0 0 0 0 0\n",
+ " -1 0 -1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 -1 0 -1 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
+ ]
+ }
+ ],
+ "source": [
+ "for marker_id, genotypes in pedfile.iter_geno():\n",
+ " print(marker_id)\n",
+ " print(genotypes)\n",
+ " break"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### Nucleotide format\n",
+ "\n",
+ "Cycling through genotypes as `A`, `C`, `G` and `T` values (where `00` is unknown)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rs10399749\n",
+ "['CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' '00' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC' '00' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " '00' 'CC' '00' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' '00' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' '00' 'CC' '00' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC'\n",
+ " 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC' 'CC']\n"
+ ]
+ }
+ ],
+ "source": [
+ "for marker_id, genotypes in pedfile.iter_acgt_geno():\n",
+ " print(marker_id)\n",
+ " print(genotypes)\n",
+ " break"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Iterating over selected markers\n",
+ "\n",
+ "\n",
+ "#### Additive format\n",
+ "\n",
+ "Cycling through genotypes as `-1`, `0`, `1` and `2` values, where `-1` is unknown, `0` is homozygous (major allele), `1` is heterozygous, and `2` is homozygous (minor allele)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rs7092431\n",
+ "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n",
+ "\n",
+ "rs9943770\n",
+ "[ 0 0 0 2 0 2 0 1 1 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0\n",
+ " 1 0 1 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1\n",
+ " 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0\n",
+ " 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 1 -1 0 0 0 1 2 1 1 0 1 1 2 0 0 0 1 0 0 0 0 1 1 0 2\n",
+ " 1 1 1 0 1 1 -1 1 0 0 1 0 0 2 0 1 2 0 1 1 1 2 0 1 0\n",
+ " 0 0 0 0 0 0 2 1 2 1 1 2 1 1 1 1 0 1 1 2 1 0 -1 0 1\n",
+ " 1 1 0 1 0 1 -1 2 1 0 0 0 1 1 1 2 0 0 1 1]\n",
+ "\n",
+ "rs1587483\n",
+ "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "markers = [\"rs7092431\", \"rs9943770\", \"rs1587483\"]\n",
+ "for marker_id, genotypes in pedfile.iter_geno_marker(markers):\n",
+ " print(marker_id)\n",
+ " print(genotypes, end=\"\\n\\n\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### Nucleotide format\n",
+ "\n",
+ "Cycling through genotypes as `A`, `C`, `G` and `T` values (where `00` is unknown)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rs7092431\n",
+ "['GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' '00'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG']\n",
+ "\n",
+ "rs9943770\n",
+ "['AA' 'AA' 'AA' 'GG' 'AA' 'GG' 'AA' 'GA' 'GA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'GA' 'AA' 'AA' 'GA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA'\n",
+ " 'GA' 'AA' 'GA' 'AA' 'AA' 'GA' 'AA' 'GA' 'AA' 'AA' 'GA' 'GA' 'GA' 'AA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'GA' 'GA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' 'GA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GA' '00' 'AA' 'AA' 'AA'\n",
+ " 'GA' 'GG' 'GA' 'GA' 'AA' 'GA' 'GA' 'GG' 'AA' 'AA' 'AA' 'GA' 'AA' 'AA' 'AA'\n",
+ " 'AA' 'GA' 'GA' 'AA' 'GG' 'GA' 'GA' 'GA' 'AA' 'GA' 'GA' '00' 'GA' 'AA' 'AA'\n",
+ " 'GA' 'AA' 'AA' 'GG' 'AA' 'GA' 'GG' 'AA' 'GA' 'GA' 'GA' 'GG' 'AA' 'GA' 'AA'\n",
+ " 'AA' 'AA' 'AA' 'AA' 'AA' 'AA' 'GG' 'GA' 'GG' 'GA' 'GA' 'GG' 'GA' 'GA' 'GA'\n",
+ " 'GA' 'AA' 'GA' 'GA' 'GG' 'GA' 'AA' '00' 'AA' 'GA' 'GA' 'GA' 'AA' 'GA' 'AA'\n",
+ " 'GA' '00' 'GG' 'GA' 'AA' 'AA' 'AA' 'GA' 'GA' 'GA' 'GG' 'AA' 'AA' 'GA' 'GA']\n",
+ "\n",
+ "rs1587483\n",
+ "['GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' '00' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' '00' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'CG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' '00' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG'\n",
+ " 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG' 'GG']\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "markers = [\"rs7092431\", \"rs9943770\", \"rs1587483\"]\n",
+ "for marker_id, genotypes in pedfile.iter_acgt_geno_marker(markers):\n",
+ " print(marker_id)\n",
+ " print(genotypes, end=\"\\n\\n\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Extracting a single marker\n",
+ "\n",
+ "\n",
+ "#### Additive format\n",
+ "\n",
+ "Cycling through genotypes as `-1`, `0`, `1` and `2` values, where `-1` is unknown, `0` is homozygous (major allele), `1` is heterozygous, and `2` is homozygous (minor allele)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0, 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0,\n",
+ " 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1,\n",
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0,\n",
+ " 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], dtype=int8)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pedfile.get_geno_marker(\"rs7619974\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### Nucleotide format\n",
+ "\n",
+ "Cycling through genotypes as `A`, `C`, `G` and `T` values (where `00` is unknown)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['CC', 'CC', 'CC', 'CC', 'CC', '00', 'CC', 'CC', '00', '00', 'CC',\n",
+ " 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC',\n",
+ " 'CC', 'CC', '00', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC',\n",
+ " '00', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC',\n",
+ " 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC',\n",
+ " 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC',\n",
+ " 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC',\n",
+ " 'CC', '00', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC', 'CC',\n",
+ " 'CC', 'CC', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00',\n",
+ " '00', '00', '00', '00', '00', '00'], \n",
+ " dtype=' Note that `PyPlink` only writes the `BED` file. The user is required to create the `FAM` and `BIM` files."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# The genotypes for 3 markers and 10 samples\n",
+ "all_genotypes = [\n",
+ " [0, 0, 0, 1, 0, 0, -1, 2, 1, 0],\n",
+ " [0, 0, 1, 1, 0, 0, 0, 1, 2, 0],\n",
+ " [0, 0, 0, 0, 1, 1, 0, 0, 0, 1],\n",
+ "]\n",
+ "\n",
+ "# Writing the BED file using PyPlink\n",
+ "with PyPlink(\"test_output\", \"w\") as pedfile:\n",
+ " for genotypes in all_genotypes:\n",
+ " pedfile.write_marker(genotypes)\n",
+ "\n",
+ "# Writing a dummy FAM file\n",
+ "with open(\"test_output.fam\", \"w\") as fam_file:\n",
+ " for i in range(10):\n",
+ " print(\"family_{}\".format(i+1), \"sample_{}\".format(i+1), \"0\", \"0\", \"0\", \"-9\",\n",
+ " sep=\" \", file=fam_file)\n",
+ "\n",
+ "# Writing a dummy BIM file\n",
+ "with open(\"test_output.bim\", \"w\") as bim_file:\n",
+ " for i in range(3):\n",
+ " print(\"1\", \"marker_{}\".format(i+1), \"0\", i+1, \"A\", \"T\",\n",
+ " sep=\"\\t\", file=bim_file)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Checking the content of the newly created binary files\n",
+ "pedfile = PyPlink(\"test_output\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fid | \n",
+ " iid | \n",
+ " father | \n",
+ " mother | \n",
+ " gender | \n",
+ " status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " family_1 | \n",
+ " sample_1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " family_2 | \n",
+ " sample_2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " family_3 | \n",
+ " sample_3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " family_4 | \n",
+ " sample_4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " family_5 | \n",
+ " sample_5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " family_6 | \n",
+ " sample_6 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " family_7 | \n",
+ " sample_7 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " family_8 | \n",
+ " sample_8 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " family_9 | \n",
+ " sample_9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " family_10 | \n",
+ " sample_10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " -9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fid iid father mother gender status\n",
+ "0 family_1 sample_1 0 0 0 -9\n",
+ "1 family_2 sample_2 0 0 0 -9\n",
+ "2 family_3 sample_3 0 0 0 -9\n",
+ "3 family_4 sample_4 0 0 0 -9\n",
+ "4 family_5 sample_5 0 0 0 -9\n",
+ "5 family_6 sample_6 0 0 0 -9\n",
+ "6 family_7 sample_7 0 0 0 -9\n",
+ "7 family_8 sample_8 0 0 0 -9\n",
+ "8 family_9 sample_9 0 0 0 -9\n",
+ "9 family_10 sample_10 0 0 0 -9"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pedfile.get_fam()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " chrom | \n",
+ " pos | \n",
+ " cm | \n",
+ " a1 | \n",
+ " a2 | \n",
+ "
\n",
+ " \n",
+ " snp | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " marker_1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A | \n",
+ " T | \n",
+ "
\n",
+ " \n",
+ " marker_2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " A | \n",
+ " T | \n",
+ "
\n",
+ " \n",
+ " marker_3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " A | \n",
+ " T | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " chrom pos cm a1 a2\n",
+ "snp \n",
+ "marker_1 1 1 0 A T\n",
+ "marker_2 1 2 0 A T\n",
+ "marker_3 1 3 0 A T"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pedfile.get_bim()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "marker_1 [ 0 0 0 1 0 0 -1 2 1 0]\n",
+ "marker_2 [0 0 1 1 0 0 0 1 2 0]\n",
+ "marker_3 [0 0 0 0 1 1 0 0 0 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "for marker, genotypes in pedfile:\n",
+ " print(marker, genotypes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The newly created binary files are compatible with Plink."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "@----------------------------------------------------------@\n",
+ "| PLINK! | v1.07 | 10/Aug/2009 |\n",
+ "|----------------------------------------------------------|\n",
+ "| (C) 2009 Shaun Purcell, GNU General Public License, v2 |\n",
+ "|----------------------------------------------------------|\n",
+ "| For documentation, citation & bug-report instructions: |\n",
+ "| http://pngu.mgh.harvard.edu/purcell/plink/ |\n",
+ "@----------------------------------------------------------@\n",
+ "\n",
+ "Skipping web check... [ --noweb ] \n",
+ "Writing this text to log file [ plink.log ]\n",
+ "Analysis started: Tue Nov 3 11:12:07 2015\n",
+ "\n",
+ "Options in effect:\n",
+ "\t--noweb\n",
+ "\t--bfile test_output\n",
+ "\t--freq\n",
+ "\n",
+ "Reading map (extended format) from [ test_output.bim ] \n",
+ "3 markers to be included from [ test_output.bim ]\n",
+ "Reading pedigree information from [ test_output.fam ] \n",
+ "10 individuals read from [ test_output.fam ] \n",
+ "0 individuals with nonmissing phenotypes\n",
+ "Assuming a disease phenotype (1=unaff, 2=aff, 0=miss)\n",
+ "Missing phenotype value is also -9\n",
+ "0 cases, 0 controls and 10 missing\n",
+ "0 males, 0 females, and 10 of unspecified sex\n",
+ "Warning, found 10 individuals with ambiguous sex codes\n",
+ "These individuals will be set to missing ( or use --allow-no-sex )\n",
+ "Writing list of these individuals to [ plink.nosex ]\n",
+ "Reading genotype bitfile from [ test_output.bed ] \n",
+ "Detected that binary PED file is v1.00 SNP-major mode\n",
+ "Before frequency and genotyping pruning, there are 3 SNPs\n",
+ "10 founders and 0 non-founders found\n",
+ "Writing allele frequencies (founders-only) to [ plink.frq ] \n",
+ "\n",
+ "Analysis finished: Tue Nov 3 11:12:10 2015\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from subprocess import Popen, PIPE\n",
+ "\n",
+ "# Computing frequencies\n",
+ "proc = Popen([\"plink\", \"--noweb\", \"--bfile\", \"test_output\", \"--freq\"],\n",
+ " stdout=PIPE, stderr=PIPE)\n",
+ "outs, errs = proc.communicate()\n",
+ "print(outs.decode(), end=\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " CHR SNP A1 A2 MAF NCHROBS\n",
+ " 1 marker_1 A T 0.2222 18\n",
+ " 1 marker_2 A T 0.25 20\n",
+ " 1 marker_3 A T 0.15 20\n"
+ ]
+ }
+ ],
+ "source": [
+ "with open(\"plink.frq\", \"r\") as i_file:\n",
+ " print(i_file.read(), end=\"\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/pyplink/pyplink.py b/pyplink/pyplink.py
index 16acf5b..be6db36 100644
--- a/pyplink/pyplink.py
+++ b/pyplink/pyplink.py
@@ -27,6 +27,11 @@
import os
+try:
+ from itertools import zip_longest as zip_longest
+except ImportError:
+ from itertools import izip_longest as zip_longest
+
import numpy as np
import pandas as pd
@@ -43,9 +48,10 @@
# The recoding values
_geno_recode = {1: -1, # Unknown genotype
- 2: 1, # Heterozygous genotype
- 0: 2, # Homozygous A1
- 3: 0} # Homozygous A2
+ 2: 1, # Heterozygous genotype
+ 0: 2, # Homozygous A1
+ 3: 0} # Homozygous A2
+_byte_recode = dict(value[::-1] for value in _geno_recode.items())
class PyPlink(object):
@@ -60,40 +66,58 @@ class PyPlink(object):
dtype=np.int8,
)
- def __init__(self, i_prefix):
+ def __init__(self, i_prefix, mode="r"):
"""Initializes a new PyPlink object.
- :param i_prefix: the prefix of the binary Plink files
- :type i_prefix: str
+ Args:
+ i_prefix (str): the prefix of the binary Plink files
+ mode (str): the open mode for the binary Plink file
+ nb_samples (int): the number of samples
- Reads binary Plink files (BED, BIM and FAM) and create the objects.
+ Reads or write binary Plink files (BED, BIM and FAM).
"""
- # These are the name of the input files
+ # The mode
+ self._mode = mode
+
+ # These are the name of the files
self.bed_filename = "{}.bed".format(i_prefix)
self.bim_filename = "{}.bim".format(i_prefix)
self.fam_filename = "{}.fam".format(i_prefix)
- # Checking that all the files exists (otherwise, there's nothing to do)
- for filename in (self.bed_filename, self.bim_filename,
- self.fam_filename):
- if not os.path.isfile(filename):
- raise IOError("No such file: '{}'".format(filename))
+ if self._mode == "r":
+ # Checking that all the files exists (otherwise, there's nothing to
+ # do)
+ for filename in (self.bed_filename, self.bim_filename,
+ self.fam_filename):
+ if not os.path.isfile(filename):
+ raise IOError("No such file: '{}'".format(filename))
+
+ # Setting BIM and FAM to None
+ self._bim = None
+ self._fam = None
+
+ # Reading the input files
+ self._read_bim()
+ self._read_fam()
+ self._read_bed()
- # Setting BIM and FAM to None
- self._bim = None
- self._fam = None
+ # Where we're at
+ self._n = 0
- # Reading the input files
- self._read_bim()
- self._read_fam()
- self._read_bed()
+ elif self._mode == "w":
+ # The dummy number of samples and bytes
+ self._nb_samples = None
- # Where we're at
- self._n = 0
+ # Opening the output BED file
+ self._bed_file = open(self.bed_filename, "wb")
+ self._write_bed_header()
+
+ else:
+ raise ValueError("invalid mode: '{}'".format(self._mode))
def __repr__(self):
- """The __repr__ function."""
+ """The representation of the PyPlink object."""
return "PyPlink({:,d} samples; {:,d} markers)".format(
self.get_nb_samples(),
self.get_nb_markers(),
@@ -107,6 +131,16 @@ def __next__(self):
"""The __next__ function."""
return self.next()
+ def __enter__(self):
+ """Entering the context manager."""
+ return self
+
+ def __exit__(self, *args):
+ """Exiting the context manager."""
+ if self._mode == "w":
+ # Closing the BED file
+ self._bed_file.close()
+
def next(self):
"""The next function."""
if self._n < self._nb_markers:
@@ -129,17 +163,16 @@ def seek(self, n):
def _read_bim(self):
"""Reads the BIM file."""
- # The original BIM columns
- original_bim_cols = ["chrom", "snp", "cm", "pos", "a1", "a2"]
-
# Reading the BIM file and setting the values
- bim = pd.read_csv(self.bim_filename, sep="\t",
- names=original_bim_cols)
+ bim = pd.read_csv(self.bim_filename, delim_whitespace=True,
+ names=["chrom", "snp", "cm", "pos", "a1", "a2"])
- # The 'snp' should always be strings
+ # The 'snp', 'a1' and 'a2' columns should always be strings
bim["snp"] = bim["snp"].astype(str)
+ bim["a1"] = bim["a1"].astype(str)
+ bim["a2"] = bim["a2"].astype(str)
- bim = bim.set_index("snp", drop=False)
+ bim = bim.set_index("snp")
bim["i"] = range(len(bim))
bim[2] = bim.a1 * 2 # Original '0'
bim[1] = bim.a1 + bim.a2 # Original '2'
@@ -168,16 +201,16 @@ def get_nb_markers(self):
def _read_fam(self):
"""Reads the FAM file."""
- # The original FAM columns
- self.original_fam_cols = ["fid", "iid", "father", "mother", "gender",
- "status"]
# Reading the FAM file and setting the values
- fam = pd.read_csv(self.fam_filename, sep=" ",
- names=self.original_fam_cols)
+ fam = pd.read_csv(self.fam_filename, delim_whitespace=True,
+ names=["fid", "iid", "father", "mother", "gender",
+ "status"])
- # 'fid' and 'iid' should always be strings (more logical that way)
+ # The sample IDs should always be strings (more logical that way)
fam["fid"] = fam["fid"].astype(str)
fam["iid"] = fam["iid"].astype(str)
+ fam["father"] = fam["father"].astype(str)
+ fam["mother"] = fam["mother"].astype(str)
fam["byte"] = [
int(np.ceil((1 + 1) / 4.0)) - 1 for i in range(len(fam))
@@ -229,6 +262,11 @@ def _read_bed(self):
# Saving the data in the object
self._bed = data
+ def _write_bed_header(self):
+ """Writes the BED first 3 bytes."""
+ # Writing the first three bytes
+ self._bed_file.write(bytearray((108, 27, 1)))
+
def iter_geno(self):
"""Iterates over genotypes."""
for i in range(len(self._bed)):
@@ -307,3 +345,29 @@ def get_acgt_geno_marker(self, marker):
geno = self._geno_values[self._bed[i]].flatten(order="C")
return self._allele_encoding[i][geno[:self._nb_samples]]
+
+ def write_marker(self, genotypes):
+ """Write genotypes to binary file."""
+ # Initializing the number of samples if required
+ if self._nb_samples is None:
+ self._nb_samples = len(genotypes)
+
+ # Checking the expected number of samples
+ if self._nb_samples != len(genotypes):
+ raise ValueError("{:,d} samples expected, got {:,d}".format(
+ self._nb_samples,
+ len(genotypes),
+ ))
+
+ # Writing to file
+ byte_array = [
+ g[0] | (g[1] << 2) | (g[2] << 4) | (g[3] << 6) for g in
+ self._grouper((_byte_recode[geno] for geno in genotypes), 4)
+ ]
+ self._bed_file.write(bytearray(byte_array))
+
+ @staticmethod
+ def _grouper(iterable, n, fillvalue=0):
+ """Collect data into fixed-length chunks or blocks."""
+ args = [iter(iterable)] * n
+ return zip_longest(fillvalue=fillvalue, *args)
diff --git a/pyplink/tests/test_pyplink.py b/pyplink/tests/test_pyplink.py
index d7a2842..978a7ca 100644
--- a/pyplink/tests/test_pyplink.py
+++ b/pyplink/tests/test_pyplink.py
@@ -23,6 +23,8 @@
# THE SOFTWARE.
+from __future__ import print_function
+
import os
import random
import shutil
@@ -519,7 +521,95 @@ def test_get_acgt_geno_marker(self):
# Asking for an unknown marker should raise an ValueError
with self.assertRaises(ValueError) as cm:
self.pedfile.get_acgt_geno_marker("dummy_marker")
- self.assertEqual(
- "dummy_marker: marker not in BIM",
- str(cm.exception),
- )
+ self.assertEqual("dummy_marker: marker not in BIM", str(cm.exception))
+
+ def test_get_context_read_mode(self):
+ """Tests the PyPlink object as context manager."""
+ with PyPlink(self.prefix) as genotypes:
+ self.assertEqual(3, len(genotypes.get_fam().head(n=3)))
+
+ def test_invalid_mode(self):
+ """Tests invalid mode when PyPlink as context manager."""
+ with self.assertRaises(ValueError) as cm:
+ with PyPlink(self.prefix, "u") as genotypes:
+ pass
+ self.assertEqual("invalid mode: 'u'", str(cm.exception))
+
+ def test_write_binary(self):
+ """Tests writing a Plink binary file."""
+ # The expected genotypes
+ expected_genotypes = [
+ np.array([0, 0, 0, 1, 0, 1, 2], dtype=int),
+ np.array([0, 0, 0, 0, -1, 0, 1], dtype=int),
+ np.array([0, -1, -1, 2, 0, 0, 0], dtype=int),
+ ]
+
+ # The prefix
+ test_prefix = os.path.join(self.tmp_dir, "test_write")
+
+ # Writing the binary file
+ with PyPlink(test_prefix, "w") as plink:
+ for genotypes in expected_genotypes:
+ plink.write_marker(genotypes)
+
+ # Checking the file exists
+ self.assertTrue(os.path.isfile(test_prefix + ".bed"))
+
+ # Writing the FAM file
+ with open(test_prefix + ".fam", "w") as o_file:
+ for i in range(7):
+ print("f{}".format(i+1), "s{}".format(i+1), "0", "0",
+ random.choice((1, 2)), "-9", sep=" ", file=o_file)
+
+ # Writing the BIM file
+ with open(test_prefix + ".bim", "w") as o_file:
+ for i in range(3):
+ print(i+1, "m{}".format(i+1), "0", i+1, "T", "A", sep="\t",
+ file=o_file)
+
+ # Reading the written binary file
+ plink = PyPlink(test_prefix)
+ for i, (marker, genotypes) in enumerate(plink):
+ self.assertEqual("m{}".format(i+1), marker)
+ self.assertTrue((expected_genotypes[i] == genotypes).all())
+
+ def test_write_binary_error(self):
+ """Tests writing a binary file, with different number of sample."""
+ # The expected genotypes
+ expected_genotypes = [
+ np.array([0, 0, 0, 1, 0, 1, 2], dtype=int),
+ np.array([0, 0, 0, 0, -1, 0], dtype=int),
+ np.array([0, -1, -1, 2, 0, 0, 0], dtype=int),
+ ]
+
+ # The prefix
+ test_prefix = os.path.join(self.tmp_dir, "test_write")
+
+ # Writing the binary file
+ with self.assertRaises(ValueError) as cm:
+ with PyPlink(test_prefix, "w") as plink:
+ for genotypes in expected_genotypes:
+ plink.write_marker(genotypes)
+ self.assertEqual("7 samples expected, got 6", str(cm.exception))
+
+ def test_grouper_padding(self):
+ """Tests the _grouper function (when padding is required)."""
+ expected_chunks = [
+ (0, 1, 2),
+ (3, 4, 5),
+ (6, 7, 8),
+ (9, 0, 0),
+ ]
+ observed_chunks = PyPlink._grouper(range(10), 3)
+ for expected, observed in zip(expected_chunks, observed_chunks):
+ self.assertEqual(expected, observed)
+
+ def test_grouper_no_padding(self):
+ """Tests the _grouper function (when padding is not required)."""
+ expected_chunks = [
+ (0, 1, 2, 3, 4),
+ (5, 6, 7, 8, 9),
+ ]
+ observed_chunks = PyPlink._grouper(range(10), 5)
+ for expected, observed in zip(expected_chunks, observed_chunks):
+ self.assertEqual(expected, observed)
diff --git a/setup.py b/setup.py
index 4455fd5..990aef8 100644
--- a/setup.py
+++ b/setup.py
@@ -7,9 +7,7 @@
# - python setup.py bdist_wheel --universal
# How to build for conda (do both with 2.7 and 3.4)
-# - python setup.py bdist_conda
-# - conda convert -p all /PATH/TO/FILE -o conda_dist
-# - cd conda_dist && conda index *
+# - bash conda_build.sh
import os
@@ -17,8 +15,8 @@
MAJOR = 1
-MINOR = 0
-MICRO = 2
+MINOR = 1
+MICRO = 0
VERSION = "{}.{}.{}".format(MAJOR, MINOR, MICRO)