Skip to content

Commit

Permalink
Add dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
chenditc committed Jul 20, 2022
1 parent 1ccc9c5 commit ea48f49
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.csv
.dolt/
.sqlhistory
*.log
*.out
13 changes: 13 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM continuumio/anaconda3

RUN wget https://github.com/dolthub/dolt/releases/download/v0.40.19/dolt-linux-amd64.tar.gz -O /tmp/dolt-linux-amd64.tar.gz && cd /tmp && tar -zxvf /tmp/dolt-linux-amd64.tar.gz && cp /tmp/dolt-linux-amd64/bin/dolt /usr/bin/ && rm -rf /tmp/*
RUN apt update && apt install -y git psmisc zip gcc g++
RUN cd / && dolt clone chenditc/investment_data
RUN cd /investment_data && git init && git pull https://github.com/chenditc/investment_data.git
RUN pip install numpy && pip install --upgrade cython \
&& cd / && git clone https://github.com/microsoft/qlib.git \
&& cd /qlib/ && pip install . && pip install -r scripts/data_collector/yahoo/requirements.txt
COPY ./requirements.txt /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
COPY . /app
WORKDIR /investment_data/
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,20 @@ Follow https://github.com/dolthub/dolt

## Export to qlib format
```
dolt sql-server -H 0.0.0.0
docker run -v /<some output directory>:/output --it --rm chenditc/investment_data bash dump_qlib_bin.sh && cp ./qlib_bin.tar.gz /output/
```

# Run in this repo's root directory
mkdir ./qlib/qlib_source
python ./qlib/dump_all_to_qlib_source.py
## Daily Update
```
export TUSHARE=<Token>
bash daily_update.sh
```

# Run qlib's yahoo converter: https://github.com/microsoft/qlib/tree/main/scripts/data_collector/yahoo
python3 ~/qlib/scripts/data_collector/yahoo/collector.py normalize_data --source_dir /mnt/investment_data/qlib/qlib_source/ --normalize_dir ./qlib_normalize --max_workers=16 --date_field_name="tradedate"
python3 ~/qlib/scripts/dump_bin.py dump_all --csv_path ./qlib_normalize/ --qlib_dir ./qlib_bin --date_field_name=tradedate --exclude_fields=tradedate,symbol
## Daily update and output
```
docker run -v /<some output directory>:/output --it --rm chenditc/investment_data bash daily_update.sh && bash dump_qlib_bin.sh && cp ./qlib_bin.tar.gz /output/
```


# Initiative
1. Try to fill in missing data by combining data from multiple data source. For example, delist company's data.
Expand Down
2 changes: 2 additions & 0 deletions daily_update.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
dolt pull chenditc/investment_data

echo "Updating index weight"
startdate=$(dolt sql -q "select DATE_FORMAT(DATE_ADD(max(trade_date), INTERVAL 1 DAY), '%Y%m%d') from ts_index_weight" -r csv | tail -1)
python3 tushare/dump_index_weight.py --start_date=$startdate
Expand Down
19 changes: 19 additions & 0 deletions dump_qlib_bin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
cd /investment_data/
dolt pull chenditc/investment_data

dolt sql-server &
mkdir ./qlib/qlib_source
python3 ./qlib/dump_all_to_qlib_source.py
killall dolt

python3 /qlib/scripts/data_collector/yahoo/collector.py normalize_data --source_dir ./qlib/qlib_source/ --normalize_dir ./qlib_normalize --max_workers=16 --date_field_name="tradedate"
python3 /qlib/scripts/dump_bin.py dump_all --csv_path ./qlib_normalize/ --qlib_dir ./qlib_bin --date_field_name=tradedate --exclude_fields=tradedate,symbol

dolt sql-server &
mkdir ./qlib/qlib_index/
python3 ./qlib/dump_index_weight.py
killall dolt

cp qlib/qlib_index/csi* ./qlib_bin/instruments/

tar -czvf ./qlib_bin.tar.gz ./qlib_bin/
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
tushare
sqlalchemy
pymysql
fire

0 comments on commit ea48f49

Please sign in to comment.