Skip to content

Commit

Permalink
Merge pull request #5 from soachishti/download-report-feature
Browse files Browse the repository at this point in the history
Download report feature
  • Loading branch information
soachishti authored Oct 18, 2017
2 parents 587762d + 6f4898e commit 08705bf
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 39 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
userid.txt
submission/report.html
submission/report/*

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@ m.addFilesByWildcard("submission/a01-*.py")

url = m.send() # Submission Report URL

print ("Report Url: " + url)

# Save report file
m.saveWebPage(url, "submission/report.html")

# Download whole report locally including code diff links
mosspy.download_report(url, "submission/report/", connections=8)
```

## Python Compatibility
Expand Down
7 changes: 6 additions & 1 deletion moss_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

m.addFilesByWildcard("submission/a01-*.py")

url = m.send()
url = m.send()

print ("Report URL: " + url)

# Save report file
m.saveWebPage(url, "submission/report.html")

mosspy.download_report(url, "submission/report/", connections=8, log_level=10) # logging.DEBUG (20 to disable)
3 changes: 2 additions & 1 deletion mosspy/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .moss import Moss
from .moss import Moss
from .download_report import download_report
77 changes: 77 additions & 0 deletions mosspy/download_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from threading import Thread
import logging
import os
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen

def process_url(url, urls, base_url, path):
from bs4 import BeautifulSoup # Backward compability, don't break Moss when bs4 not available.

logging.debug ("Processing URL: " + url)
response = urlopen(url)
html = response.read()
soup = BeautifulSoup(html, 'lxml')
file_name = os.path.basename(url)

if not file_name or len(file_name.split(".")) == 1: # Not file name eg. 123456789 or is None
file_name = "index.html"

for more_url in soup.find_all(['a', 'frame']):
if more_url.has_attr('href'):
link = more_url.get('href')
else:
link = more_url.get('src')

if link and (link.find("match") != -1): # Download only results urls
link = link.split('#')[0] # remove fragment from url
basename = os.path.basename(link)

if basename == link: # Handling relative urls
link = base_url + basename

if more_url.name == "a":
more_url['href'] = basename
elif more_url.name == "frame":
more_url['src'] = basename

if link not in urls:
urls.append(link)

f = open(path + file_name, 'w')
f.write(str(soup)) # saving soup will save updated href
f.close()

def download_report(url, path, connections = 4, log_level=logging.DEBUG):
logging.basicConfig(level=log_level)

if len(url) == 0:
raise Exception("Empty url supplied")

if not os.path.exists(path):
os.makedirs(path)

base_url = url + "/"
urls = [url]
threads = []

logging.debug("="*80)
logging.debug("Downloading Moss Report - URL: " + url)
logging.debug("="*80)

# Handling thread
for url in urls:
t = Thread(target=process_url, args=[url, urls, base_url, path])
t.start()
threads.append(t)

if len(threads) == connections or len(urls) < connections:
for thread in threads:
thread.join()
threads.remove(thread)
break

logging.debug("Waiting for all threads to complete")
for thread in threads:
thread.join()
6 changes: 5 additions & 1 deletion mosspy/moss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import socket
import glob
import logging

try:
from urllib.request import urlopen
Expand Down Expand Up @@ -132,9 +133,12 @@ def send(self):
return response.decode().replace("\n","")

def saveWebPage(self, url, path):
if len(url) == 0:
raise Exception("Empty url supplied")

response = urlopen(url)
content = response.read()

f = open(path, 'w')
f.write(content.decode())
f.close()
f.close()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@
download_url = 'https://github.com/soachishti/moss.py/releases', # I'll explain this in a second
keywords = ['moss', 'similarity', 'detecting', 'plagiarism'], # arbitrary keywords
classifiers = [],
install_requires=['beautifulsoup4==4.6.0'],
)
36 changes: 0 additions & 36 deletions submission/report.html

This file was deleted.

0 comments on commit 08705bf

Please sign in to comment.