diff --git a/backend/src/tasks/censys.ts b/backend/src/tasks/censys.ts index bdd1895f6..3817187c8 100644 --- a/backend/src/tasks/censys.ts +++ b/backend/src/tasks/censys.ts @@ -1,33 +1,37 @@ import axios from 'axios'; -import { Domain, Organization } from '../models'; +import { Domain } from '../models'; import { plainToClass } from 'class-transformer'; import * as dns from 'dns'; import saveDomainsToDb from './helpers/saveDomainsToDb'; import { CommandOptions } from './ecs-client'; import getRootDomains from './helpers/getRootDomains'; +// TODO: code only returns first 100 results; add cursor to API call to return all results interface CensysAPIResponse { status: string; - results: { - 'parsed.names'?: string[]; - }[]; - metadata: { - count: number; - page: number; - pages: number; + result: { + total: number; + hits: [ + { + names?: string[]; + } + ]; }; + // links: { + // next: string; + // }; } -const sleep = (milliseconds) => { +const sleep = (milliseconds: number) => { return new Promise((resolve) => setTimeout(resolve, milliseconds)); }; -const fetchCensysData = async (rootDomain: string, page: number) => { +const fetchCensysData = async (rootDomain: string) => { console.log( - `[censys] fetching certificates for query "${rootDomain}", page ${page}` + `[censys] fetching certificates for query "${rootDomain}" from Censys...` ); - const { data, status } = await axios({ - url: 'https://censys.io/api/v1/search/certificates', + const { data } = await axios({ + url: 'https://search.censys.io/api/v2/certificates/search', method: 'POST', auth: { username: String(process.env.CENSYS_API_ID), @@ -37,9 +41,9 @@ const fetchCensysData = async (rootDomain: string, page: number) => { 'Content-Type': 'application/json' }, data: { - query: rootDomain, - page: page, - fields: ['parsed.names'] + q: rootDomain, + per_page: 100, + fields: ['names'] } }); return data as CensysAPIResponse; @@ -51,6 +55,7 @@ export const handler = async (commandOptions: CommandOptions) => { console.log('Running censys on organization', organizationName); const rootDomains = await getRootDomains(organizationId!); + const uniqueNames = new Set(); //used to dedupe domain names const foundDomains = new Set<{ name: string; organization: { id: string }; @@ -59,33 +64,30 @@ export const handler = async (commandOptions: CommandOptions) => { }>(); for (const rootDomain of rootDomains) { - let pages = 1; - for (let page = 1; page <= pages; page++) { - const data = await fetchCensysData(rootDomain, page); - pages = data.metadata.pages; - for (const result of data.results) { - const names = result['parsed.names']; - if (!names) continue; - for (const name of names) { - if (name.endsWith(rootDomain)) { - foundDomains.add({ - name: name.replace('*.', ''), - organization: { id: organizationId! }, - fromRootDomain: rootDomain, - discoveredBy: { id: scanId } - }); - } + const data = await fetchCensysData(rootDomain); + for (const hit of data.result.hits) { + const names = hit['names']; + if (!names) continue; + for (const name of names) { + if (!uniqueNames.has(name) && name.endsWith(rootDomain)) { + uniqueNames.add(name); + foundDomains.add({ + name: name.replace('*.', ''), + organization: { id: organizationId! }, + fromRootDomain: rootDomain, + discoveredBy: { id: scanId } + }); } } - - await sleep(1000); // Wait for rate limit } + + await sleep(1000); // Wait for rate limit } // LATER: Can we just grab the cert the site is presenting, and store that? // Censys (probably doesn't know who's presenting it) // SSLyze (fetches the cert), Project Sonar (has SSL certs, but not sure how pulls domains -- from IPs) - // Project Sonar has forward & reverse DNS for finding subdomains + // Project Sonar has both forward & reverse DNS for finding subdomains // Save domains to database console.log('[censys] saving domains to database...'); @@ -109,6 +111,6 @@ export const handler = async (commandOptions: CommandOptions) => { }) ); } - saveDomainsToDb(domains); + await saveDomainsToDb(domains); console.log(`[censys] done, saved or updated ${domains.length} domains`); };