-
Notifications
You must be signed in to change notification settings - Fork 1
/
GitHubDownloader.py
123 lines (89 loc) · 3.09 KB
/
GitHubDownloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
GitHub Folder Downloader
Created by Fransiscus Emmanuel Bunaren
https://bunaren.com
'''
import json
import requests
import urllib.request
import os
class Downloader:
def __init__(self, repository_url='', branch=''):
if not repository_url:
self.repo_url = ''
self.files = []
self.location = dict()
else:
self.load_repository(repository_url, branch)
@classmethod
def __get_branch_from_url(self, url, branch=''):
if '/tree/' in url and not branch:
branch = url.split('/tree/')[1]
branch = branch.split('/')[0]
else:
branch = 'master'
return branch
@classmethod
def __get_raw_url(self, file_path, url, branch=''):
tmp_url = url.replace(
'https://api.github.com/repos/',
'https://raw.githubusercontent.com/')
tmp_url = tmp_url.split('/git/blobs/')[0]
tmp_url = tmp_url + '/' + branch + '/' + file_path
return tmp_url
def load_repository(self, url, branch=''):
# Check if URL contains branch name
branch = self.__get_branch_from_url(url, branch)
# Convert URL to match GitHub API URI
tmp_url = url.replace('https://github.com/',
'https://api.github.com/repos/')
tmp_url += '/git/trees/{}?recursive=1'.format(branch)
# Make GET Request
api = requests.get(tmp_url).text
files = json.loads(api)
# Turn the API Data into List
output = []
location = dict()
for (k, i) in enumerate(files['tree']):
if i['type'] == 'blob':
tmp = [i['path']]
# Get RAW URL
tmp += [self.__get_raw_url(tmp[0], i['url'], branch)]
output.append(tmp)
else:
location[i['path']] = k
self.files = output
self.location = location
# Set Repo URL for memoization
self.repo_url = url
def __mkdirs(self, path):
# Make directory if not exist
if not os.path.isdir(path):
os.makedirs(path)
def download(
self,
destination,
target_folder='*',
recursive=True,
):
# Make directory if not exist
self.__mkdirs(destination)
# Find Folder Position
if target_folder == '*':
start = 0
else:
# Remove Relative Path Symbol from string
tmp_target = target_folder.replace('./', '')
tmp_target = tmp_target.replace('../', '')
# Remove "/"
tmp_target = (tmp_target if tmp_target[-1] != '/'
else tmp_target[:-1])
start = self.location[target_folder]
# Start Downloading
for i in self.files[start:]:
if recursive or i[0].split(target_folder)[1].count('/') \
<= 1:
self.__mkdirs(destination + '/' + os.path.dirname(i[0]))
urllib.request.urlretrieve(i[1], destination + '/' + i[0])