Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add num repos per company & number comp commits per repositories reports #93

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion osci/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
from .filter import FilterUnlicensedAction, FilterListCompanyProjectsAction
from .load import LoadCompanyCommitsAction
from .notify import GenerateEmailAction, ContributorsRankingMbmReportAction
from .postprocess import FindContributorsRepositoriesChangeAction, OSCIChangeReportAction
from .postprocess import (
FindContributorsRepositoriesChangeAction,
OSCIChangeReportAction,
GetNumberOfRepositoriesPerCompanyAction,
)
from .preprocess import (
MatchCompanyAction,
LoadRepositoriesAction,
Expand Down
2 changes: 2 additions & 0 deletions osci/actions/postprocess/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .find_new_repos_and_commiters import FindContributorsRepositoriesChangeAction
from .osci_change_report import OSCIChangeReportAction
from .get_num_repos_for_companies import GetNumberOfRepositoriesPerCompanyAction
from .get_num_companies_commit_per_repo import GetNumberOfCompaniesCommitsPerRepositoryAction
33 changes: 33 additions & 0 deletions osci/actions/postprocess/get_num_companies_commit_per_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Copyright since 2021, EPAM Systems

This file is part of OSCI.

OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from osci.actions import Action
from datetime import datetime
from osci.postprocess.get_num_companies_commit_per_repo import get_num_companies_commit_per_repository
from osci.datalake.reports.general.companies_commits_per_repo import CompaniesCommitsPerRepositoriesDTD


class GetNumberOfCompaniesCommitsPerRepositoryAction(Action):
"""Count number companies commits per repositories"""

@classmethod
def name(cls) -> str:
return 'get-number-of-companies-commits-per-repository'

def _execute(self, day: datetime):
report = CompaniesCommitsPerRepositoriesDTD(date=day)
report.save(get_num_companies_commit_per_repository(date=day))
return report
33 changes: 33 additions & 0 deletions osci/actions/postprocess/get_num_repos_for_companies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Copyright since 2021, EPAM Systems

This file is part of OSCI.

OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from osci.actions import Action
from datetime import datetime
from osci.postprocess.get_num_repos_per_company import get_num_repos_per_company
from osci.datalake.reports.general.repos_per_company import RepositoriesPerCompanyDTD


class GetNumberOfRepositoriesPerCompanyAction(Action):
"""Count number of repositories per company"""

@classmethod
def name(cls) -> str:
return 'get-number-of-repositories-per-company'

def _execute(self, day: datetime):
report = RepositoriesPerCompanyDTD(date=day)
report.save(get_num_repos_per_company(date=day))
return report
2 changes: 0 additions & 2 deletions osci/config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ class WebConfig(NamedTuple):


def parse_web_config(web_cfg) -> WebConfig:
log.debug(web_cfg)
fs = web_cfg['fs']
attrs_map = {
FileSystemType.local: dict(base_path=web_cfg['base_path'],
Expand Down Expand Up @@ -161,7 +160,6 @@ def __init__(self, env: str = None, dbutils=None):
if BaseYmlConfigReader(env='local').exists()
else None) or 'default'
self.__cfg = BaseYmlConfigReader(self.env, dbutils=dbutils).config
log.info(f"Full config: {self.__cfg}")
log.info(f'Configuration loaded for env: {self.env}')

file_system_type_map: Mapping[str, type(FileSystemConfig)] = {
Expand Down
3 changes: 0 additions & 3 deletions osci/config/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def _load_variables_from_env(variable):
}
if isinstance(variable, list):
return [_load_variables_from_env(v) for v in variable]
print('key', variable, 'value', os.environ.get(str(variable)))
return os.environ.get(str(variable))

return {k: _load_variables_from_env(v) for k, v in config.items() if k != META_CONFIG_FIELD}
Expand Down Expand Up @@ -140,7 +139,6 @@ def read(self) -> Dict[str, Any]:
log.debug(f'Read config from {self.file_path}')
with open(self.file_path) as config_file:
self.__cfg = yaml.load(config_file, Loader=yaml.FullLoader)
log.debug(f"Prod yml load: {self.__cfg}")
meta = self.__cfg[META_CONFIG_FIELD].copy()
if meta[CONFIG_SOURCE_TYPE_FIELD] in readers_types_map:
self.__cfg = readers_types_map[meta[CONFIG_SOURCE_TYPE_FIELD]](self.__cfg, self.dbutils)
Expand All @@ -151,7 +149,6 @@ def read(self) -> Dict[str, Any]:
file_format=self.file_format).config,
self.__cfg
)
log.debug(f"Prod yml res: {self.__cfg}")
return self.__cfg
except FileNotFoundError as ex:
log.error(ex)
Expand Down
3 changes: 1 addition & 2 deletions osci/datalake/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,12 +265,11 @@ def get_companies_contributors_repository_commits_spark_path(self, date) -> str:
def save_companies_contributors_repository_commits(self, df: pd.DataFrame, date: datetime):
raise NotImplementedError()

def get_companies_contributors_repository_commits(self, date: datetime) -> pd.DataFrame:
def get_companies_contributors_repository_commits(self, name: str, date: datetime) -> pd.DataFrame:
raise NotImplementedError()


class BaseWebArea(abc.ABC):

_osci_ranking_dir_name = 'osci-ranking'
_osci_ranking_monthly_dir_name = 'monthly'

Expand Down
6 changes: 6 additions & 0 deletions osci/datalake/blob/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,9 @@ def _get_email_path(self, date: datetime) -> str:

def save_email(self, email_body: str, date: datetime):
self.write_string_to_file(path=self._get_email_path(date=date), data=email_body, content_type='text/html')

def get_companies_contributors_repository_commits(self, name: str, date: datetime) -> pd.DataFrame:
path = self._report_base_path
full_path = date.strftime(
f'{path}/{name}/{name}_%Y-%m-%d.csv')
return self.read_pandas_dataframe_from_csv(path=full_path)
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from osci.datalake import DataLake

import datetime
import pandas as pd

Expand All @@ -26,6 +25,11 @@ class CompaniesContributorsRepository:
def __init__(self, date: datetime.datetime):
self.date = date

@property
def name(self) -> str:
"""Name of report"""
return 'Company-contributors-repository-commits_YTD'

@property
def path(self) -> str:
"""
Expand All @@ -51,4 +55,4 @@ def read(self) -> pd.DataFrame:
"""
Read company contributors repository commits to pandas DataFrame from file
"""
return DataLake().public.get_companies_contributors_repository_commits(self.date)
return DataLake().public.get_companies_contributors_repository_commits(name=self.name, date=self.date)
1 change: 0 additions & 1 deletion osci/datalake/local/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def __init__(self, base_path=BASE_PATH, base_area_dir=BASE_AREA_DIR):
super().__init__()
self.BASE_PATH = Path(base_path)
self.BASE_AREA_DIR = base_area_dir
print(self, base_path, base_area_dir)

def add_fs_prefix(self, path: Union[Path, str]) -> str:
return f'{self.FS_PREFIX}:///{path}'
Expand Down
7 changes: 7 additions & 0 deletions osci/datalake/local/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,10 @@ def _get_email_path(self, date: datetime) -> Path:
def save_email(self, email_body: str, date: datetime):
with open(str(self._get_email_path(date=date)), 'w', encoding='utf-8') as f:
f.write(email_body)

def get_companies_contributors_repository_commits(self, name: str, date: datetime) -> pd.DataFrame:
path = self._report_base_path / name
path.mkdir(parents=True, exist_ok=True)
filename = f'{name}_{date.strftime("%Y-%m-%d")}.csv'
full_path = path / filename
return pd.read_csv(full_path)
32 changes: 32 additions & 0 deletions osci/datalake/reports/general/companies_commits_per_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Copyright since 2021, EPAM Systems

This file is part of OSCI.

OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from typing import Type

from osci.datalake.schemas.public import NumberOfCompaniesCommitsInRepositories
from osci.datalake import DatePeriodType

from .base import Report, GeneralReportFactory


class CompaniesCommitsPerRepositoriesFactory(GeneralReportFactory):
report_base_cls: Type[Report] = type('_Report', (Report,),
dict(base_name='OSCI_Commits_per_repositories',
schema=NumberOfCompaniesCommitsInRepositories))


class CompaniesCommitsPerRepositoriesDTD(CompaniesCommitsPerRepositoriesFactory.report_base_cls):
date_period = DatePeriodType.DTD
32 changes: 32 additions & 0 deletions osci/datalake/reports/general/repos_per_company.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Copyright since 2021, EPAM Systems

This file is part of OSCI.

OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from typing import Type

from osci.datalake.schemas.public import NumberRepositoryPerCompaniesSchema
from osci.datalake import DatePeriodType

from .base import Report, GeneralReportFactory


class RepositoriesPerCompanyFactory(GeneralReportFactory):
report_base_cls: Type[Report] = type('_Report', (Report,),
dict(base_name='Repositories-per-company',
schema=NumberRepositoryPerCompaniesSchema))


class RepositoriesPerCompanyDTD(RepositoriesPerCompanyFactory.report_base_cls):
date_period = DatePeriodType.DTD
12 changes: 12 additions & 0 deletions osci/datalake/schemas/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,17 @@ class CompaniesContributorsRepositoryCommits:
date = 'date'


class NumberRepositoryPerCompaniesSchema:
company = 'Company'
repository = 'Repositories'


class NumberOfCompaniesCommitsInRepositories:
company = 'Company'
repository = 'Repository'
commits = 'Commits'


class OSCIContributorsRankingSchema:
company = 'Company'
author = 'Contributor'
Expand All @@ -165,3 +176,4 @@ class PublicSchemas:
new_repos = NewReposSchema
company_contributors_repository_commits = CompaniesContributorsRepositoryCommits
osci_contributors_ranking = OSCIContributorsRankingSchema
num_rep_per_company = NumberRepositoryPerCompaniesSchema
16 changes: 16 additions & 0 deletions osci/postprocess/find_new_repos_and_commiters.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
"""Copyright since 2021, EPAM Systems

This file is part of OSCI.

OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
import datetime
import pandas as pd

Expand Down
34 changes: 34 additions & 0 deletions osci/postprocess/get_num_companies_commit_per_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Copyright since 2021, EPAM Systems

This file is part of OSCI.

OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
import pandas as pd
from datetime import datetime
from osci.datalake import CompaniesContributorsRepository
from osci.datalake.schemas.public import NumberOfCompaniesCommitsInRepositories


def get_num_companies_commit_per_repository(date: datetime) -> pd.DataFrame:
"""Get number of companies commits in repositories"""
report_schema = NumberOfCompaniesCommitsInRepositories
schema = CompaniesContributorsRepository.schema
rep_per_comp_df = CompaniesContributorsRepository(date=date).read()
return rep_per_comp_df[[schema.company, schema.repository, schema.commits]] \
.groupby([schema.company, schema.repository]) \
.sum() \
.reset_index() \
.rename(columns={schema.repository: report_schema.repository,
schema.company: report_schema.company,
schema.commits: report_schema.commits})
33 changes: 33 additions & 0 deletions osci/postprocess/get_num_repos_per_company.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Copyright since 2021, EPAM Systems

This file is part of OSCI.

OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
import pandas as pd
from datetime import datetime
from osci.datalake import CompaniesContributorsRepository
from osci.datalake.schemas.public import NumberRepositoryPerCompaniesSchema


def get_num_repos_per_company(date: datetime) -> pd.DataFrame:
"""Get number of repositories per companies"""
report_schema = NumberRepositoryPerCompaniesSchema
schema = CompaniesContributorsRepository.schema
rep_per_comp_df = CompaniesContributorsRepository(date=date).read()
return rep_per_comp_df[[schema.company, schema.repository]] \
.groupby([schema.company]) \
.count() \
.reset_index() \
.rename(columns={schema.repository: report_schema.repository,
schema.company: report_schema.company})
6 changes: 6 additions & 0 deletions osci/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,9 @@ def get_compared_date(day: datetime):
if day.month == 1:
return datetime(year=day.year, month=day.month, day=1)
return datetime(year=day.year, month=day.month, day=1) - timedelta(days=1)


def days_range(start: datetime, end: datetime, delta=timedelta(days=1)):
while start < end:
yield start
start += delta
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pypandoc==1.5
pyspark == 3.0.1
pytest==6.0.1
pytest-mock==3.5.1
-r __app__/requirements.txt
-r osci/requirements.txt
Loading