Skip to content

Commit

Permalink
Merge pull request #10 from UTMediaCAT/#3-validate-date-args
Browse files Browse the repository at this point in the history
allow users to specify a specific range of dates for the tweets (twit…
  • Loading branch information
jacqueline-chan authored Mar 25, 2021
2 parents 41da829 + 9792ccb commit 9817d85
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
6 changes: 5 additions & 1 deletion commandline/.env
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
COMMANDLINE_PATH_SCOPE_PARSER='../../mediacat-frontend/scope_parser/main.py'
COMMANDLINE_PATH_INPUT_CSV='../../mediacat-frontend/scope_parser/csv/test_demo.csv'

COMMANDLINE_PATH_TWITTER_CRAWLER='../../mediacat-twitter-crawler/twitter_crawler.py'
COMMANDLINE_PATH_TWITTER_CRAWLER='../../mediacat-twitter-crawler/main.py'
COMMANDLINE_PATH_DOMAIN_CRAWLER='../../mediacat-domain-crawler/newCrawler/crawl.js'

COMMANDLINE_FAILED_DOMAIN_LINKS='./failed_links_list.json'
Expand All @@ -12,3 +12,7 @@ COMMANDLINE_domaincsvFile = './domain.csv'
COMMANDLINE_twittercsvFile = './twitter.csv'

COMMANDLINE_metadataJSON = './metadata_modified_list.json'


COMMANDLINE_TWT_START_DATE = '2020-01-01'
COMMANDLINE_TWT_END_DATE = '2020-10-31'
23 changes: 21 additions & 2 deletions commandline/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require('dotenv').config();
const PATH_SCOPE_PARSER = process.env.COMMANDLINE_PATH_SCOPE_PARSER || '../../mediacat-frontend/scope_parser/main.py';
const PATH_INPUT_CSV = process.env.COMMANDLINE_PATH_INPUT_CSV || '../../mediacat-frontend/scope_parser/csv/test_demo.csv';

const PATH_TWITTER_CRAWLER= process.env.COMMANDLINE_PATH_TWITTER_CRAWLER || '../../mediacat-twitter-crawler/twitter_crawler.py';
const PATH_TWITTER_CRAWLER= process.env.COMMANDLINE_PATH_TWITTER_CRAWLER || '../../mediacat-twitter-crawler/main.py';
const PATH_DOMAIN_CRAWLER= process.env.COMMANDLINE_PATH_DOMAIN_CRAWLER || '../../mediacat-domain-crawler/newCrawler/crawl.js';

const FAILED_DOMAIN_LINKS= process.env.COMMANDLINE_FAILED_DOMAIN_LINKS || './failed_links_list.json';
Expand All @@ -18,11 +18,18 @@ const twittercsvFile = process.env.COMMANDLINE_twittercsvFile || './twitter.csv'

const metadataJSON = process.env.COMMANDLINE_metadataJSON || './metadata_modified_list.json';

const start_date = process.env.COMMANDLINE_TWT_START_DATE || null;
const end_date = process.env.COMMANDLINE_TWT_END_DATE || null;
const keyword = process.env.COMMANDLINE_TWT_KEYWORD || null;

/**
* checks for the correct number of arguments and calles the appropriate function
* `node app.js twitter ` to run the twitter crawler
* `set in .env start_date end_date` to run the twitter crawler with dates
* `set in .env start_date end_date keyword` to run the twitter crawler with dates and a keyword
* `node app.js domain ` to run the domain crawler
* `node app.js` to run everything
* dates in YYYY-MM-DD form
*/

function run() {
Expand Down Expand Up @@ -145,10 +152,22 @@ function stepTwoTwitter() {
*/

try {
const pythonProcess2 = childProcess.spawn( `python3 ${PATH_TWITTER_CRAWLER} ${twittercsvFile}`, {

let pythonProcess2 = childProcess.spawn( `python3 ${PATH_TWITTER_CRAWLER} ${twittercsvFile}`, {
shell: true
});

if (start_date !== null && end_date != null && keyword != null ) {
pythonProcess2 = childProcess.spawn( `python3 ${PATH_TWITTER_CRAWLER} ${twittercsvFile} ${start_date} ${end_date} ${keyword}`, {
shell: true
});

} else if (start_date !== null && end_date != null ) {
pythonProcess2 = childProcess.spawn( `python3 ${PATH_TWITTER_CRAWLER} ${twittercsvFile} ${start_date} ${end_date}`, {
shell: true
});
}

pythonProcess2.on('close', () => {
callbackAfterTwitterCrawler();
});
Expand Down

0 comments on commit 9817d85

Please sign in to comment.