diff --git a/.github/workflows/report.yaml b/.github/workflows/report.yaml new file mode 100644 index 0000000..555930b --- /dev/null +++ b/.github/workflows/report.yaml @@ -0,0 +1,29 @@ +name: Run report + +on: + schedule: + - cron: '0 0 1 * *' + workflow_dispatch: + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run report + run: python google-analytics/report_github_actions.py + env: + SERVICE_ACCOUNT_CREDENTIALS: ${{ secrets.SERVICE_ACCOUNT_CREDENTIALS }} + PROPERTY_ID: ${{ secrets.PROPERTY_ID }} + TOKEN_PICKLE: ${{ secrets.TOKEN_PICKLE }} + OAUTH_CREDENTIALS: ${{ secrets.OAUTH_CREDENTIALS }} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..edff4df --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# .gitignore +service_account.json +google-analytics/service_account.json +google-analytics/token.pickle +google-analytics/oauth_credentials.json +google-analytics/report.py +google-analytics/minify.py +data/website_info.json +data/website_dimensions_info.json +data/pages_info.json diff --git a/data/pages_info.json b/data/pages_info.json new file mode 100644 index 0000000..1179216 --- /dev/null +++ b/data/pages_info.json @@ -0,0 +1,629 @@ +[ + { + "pagePath": "/trilhas/github-starter/", + "year": "2023", + "month": "11", + "activeUsers": "426", + "screenPageViews": "544", + "screenPageViewsPerSession": "1.0018416206261511", + "screenPageViewPerUser": "1.2769953051643192", + "averageSessionDuration": "134.38918510681395", + "bounceRate": "0.37569060773480661" + }, + { + "pagePath": "/trilhas/github-starter/", + "year": "2023", + "month": "12", + "activeUsers": "350", + "screenPageViews": "418", + "screenPageViewsPerSession": "0.97435897435897434", + "screenPageViewPerUser": "1.1942857142857144", + "averageSessionDuration": "132.42656525641024", + "bounceRate": "0.39160839160839161" + }, + { + "pagePath": "/trilhas/python/", + "year": "2023", + "month": "12", + "activeUsers": "216", + "screenPageViews": "274", + "screenPageViewsPerSession": "1.1138211382113821", + "screenPageViewPerUser": "1.2685185185185186", + "averageSessionDuration": "147.6575574512195", + "bounceRate": "0.093495934959349589" + }, + { + "pagePath": "/trilhas/python/", + "year": "2023", + "month": "11", + "activeUsers": "193", + "screenPageViews": "268", + "screenPageViewsPerSession": "1.1120331950207469", + "screenPageViewPerUser": "1.38860103626943", + "averageSessionDuration": "147.92074998340249", + "bounceRate": "0.14522821576763487" + }, + { + "pagePath": "/trilhas/javascript/", + "year": "2023", + "month": "11", + "activeUsers": "141", + "screenPageViews": "169", + "screenPageViewsPerSession": "1.0764331210191083", + "screenPageViewPerUser": "1.198581560283688", + "averageSessionDuration": "132.14247954140129", + "bounceRate": "0.10191082802547771" + }, + { + "pagePath": "/trilhas/javascript/", + "year": "2023", + "month": "12", + "activeUsers": "115", + "screenPageViews": "134", + "screenPageViewsPerSession": "1.072", + "screenPageViewPerUser": "1.1652173913043478", + "averageSessionDuration": "120.02005012000001", + "bounceRate": "0.096" + }, + { + "pagePath": "/quero/mentoria/", + "year": "2023", + "month": "12", + "activeUsers": "36", + "screenPageViews": "43", + "screenPageViewsPerSession": "1.1025641025641026", + "screenPageViewPerUser": "1.1944444444444444", + "averageSessionDuration": "53.386160410256416", + "bounceRate": "0.10256410256410256" + }, + { + "pagePath": "/quero/mentoria/", + "year": "2023", + "month": "11", + "activeUsers": "32", + "screenPageViews": "44", + "screenPageViewsPerSession": "1.1891891891891893", + "screenPageViewPerUser": "1.375", + "averageSessionDuration": "18.603813918918917", + "bounceRate": "0.054054054054054057" + }, + { + "pagePath": "/quero/estudar/", + "year": "2023", + "month": "11", + "activeUsers": "30", + "screenPageViews": "33", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1.1", + "averageSessionDuration": "27.505619939393945", + "bounceRate": "0.060606060606060608" + }, + { + "pagePath": "/trilhas/python/", + "year": "2024", + "month": "01", + "activeUsers": "30", + "screenPageViews": "36", + "screenPageViewsPerSession": "0.94736842105263153", + "screenPageViewPerUser": "1.2", + "averageSessionDuration": "43.338245921052632", + "bounceRate": "0.15789473684210525" + }, + { + "pagePath": "/team/", + "year": "2023", + "month": "11", + "activeUsers": "28", + "screenPageViews": "37", + "screenPageViewsPerSession": "1.1935483870967742", + "screenPageViewPerUser": "1.3214285714285714", + "averageSessionDuration": "63.027731935483871", + "bounceRate": "0" + }, + { + "pagePath": "/bio/", + "year": "2023", + "month": "11", + "activeUsers": "23", + "screenPageViews": "34", + "screenPageViewsPerSession": "1.2592592592592593", + "screenPageViewPerUser": "1.4782608695652173", + "averageSessionDuration": "133.99290433333334", + "bounceRate": "0.33333333333333331" + }, + { + "pagePath": "/trilhas/github-starter/", + "year": "2024", + "month": "01", + "activeUsers": "23", + "screenPageViews": "24", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1.0434782608695652", + "averageSessionDuration": "46.687562541666665", + "bounceRate": "0.25" + }, + { + "pagePath": "/team/", + "year": "2023", + "month": "12", + "activeUsers": "19", + "screenPageViews": "27", + "screenPageViewsPerSession": "1.4210526315789473", + "screenPageViewPerUser": "1.4210526315789473", + "averageSessionDuration": "12.166380842105264", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/css3/", + "year": "2023", + "month": "12", + "activeUsers": "17", + "screenPageViews": "21", + "screenPageViewsPerSession": "1.2352941176470589", + "screenPageViewPerUser": "1.2352941176470589", + "averageSessionDuration": "64.572313764705882", + "bounceRate": "0.058823529411764705" + }, + { + "pagePath": "/trilhas/javascript/", + "year": "2024", + "month": "01", + "activeUsers": "17", + "screenPageViews": "29", + "screenPageViewsPerSession": "1.2083333333333333", + "screenPageViewPerUser": "1.7058823529411764", + "averageSessionDuration": "251.99919654166663", + "bounceRate": "0.20833333333333334" + }, + { + "pagePath": "/quero/estudar/", + "year": "2023", + "month": "12", + "activeUsers": "16", + "screenPageViews": "20", + "screenPageViewsPerSession": "1.25", + "screenPageViewPerUser": "1.25", + "averageSessionDuration": "84.179525125", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/css3/", + "year": "2023", + "month": "11", + "activeUsers": "14", + "screenPageViews": "16", + "screenPageViewsPerSession": "1.0666666666666667", + "screenPageViewPerUser": "1.1428571428571428", + "averageSessionDuration": "30.461605066666667", + "bounceRate": "0.066666666666666666" + }, + { + "pagePath": "/trilhas/html5/", + "year": "2023", + "month": "11", + "activeUsers": "12", + "screenPageViews": "15", + "screenPageViewsPerSession": "1.0714285714285714", + "screenPageViewPerUser": "1.25", + "averageSessionDuration": "12.495450428571429", + "bounceRate": "0.14285714285714285" + }, + { + "pagePath": "/blog/", + "year": "2023", + "month": "12", + "activeUsers": "9", + "screenPageViews": "9", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "7.0331621111111104", + "bounceRate": "0" + }, + { + "pagePath": "/contact/", + "year": "2023", + "month": "11", + "activeUsers": "8", + "screenPageViews": "11", + "screenPageViewsPerSession": "1.375", + "screenPageViewPerUser": "1.375", + "averageSessionDuration": "21.677941375", + "bounceRate": "0" + }, + { + "pagePath": "/quero/apoiar/", + "year": "2023", + "month": "12", + "activeUsers": "7", + "screenPageViews": "8", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1.1428571428571428", + "averageSessionDuration": "15.606349999999999", + "bounceRate": "0.25" + }, + { + "pagePath": "/trilhas/html5/", + "year": "2023", + "month": "12", + "activeUsers": "7", + "screenPageViews": "8", + "screenPageViewsPerSession": "1.1428571428571428", + "screenPageViewPerUser": "1.1428571428571428", + "averageSessionDuration": "49.355028571428569", + "bounceRate": "0" + }, + { + "pagePath": "/bio/", + "year": "2023", + "month": "12", + "activeUsers": "5", + "screenPageViews": "8", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1.6", + "averageSessionDuration": "49.98529725", + "bounceRate": "0.75" + }, + { + "pagePath": "/projetos/", + "year": "2023", + "month": "11", + "activeUsers": "5", + "screenPageViews": "10", + "screenPageViewsPerSession": "1.6666666666666667", + "screenPageViewPerUser": "2", + "averageSessionDuration": "71.021304666666666", + "bounceRate": "0" + }, + { + "pagePath": "/blog/", + "year": "2023", + "month": "11", + "activeUsers": "4", + "screenPageViews": "5", + "screenPageViewsPerSession": "1.25", + "screenPageViewPerUser": "1.25", + "averageSessionDuration": "164.79309449999997", + "bounceRate": "0" + }, + { + "pagePath": "/conduta/", + "year": "2023", + "month": "11", + "activeUsers": "4", + "screenPageViews": "5", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1.25", + "averageSessionDuration": "94.0329384", + "bounceRate": "0.2" + }, + { + "pagePath": "/contact/", + "year": "2023", + "month": "12", + "activeUsers": "3", + "screenPageViews": "3", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "18.679011666666664", + "bounceRate": "0" + }, + { + "pagePath": "/pais_responsaveis/", + "year": "2023", + "month": "11", + "activeUsers": "3", + "screenPageViews": "3", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "12.088882666666668", + "bounceRate": "0" + }, + { + "pagePath": "/projetos/", + "year": "2023", + "month": "12", + "activeUsers": "3", + "screenPageViews": "3", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "9.228279", + "bounceRate": "0" + }, + { + "pagePath": "/quero/apoiar/", + "year": "2023", + "month": "11", + "activeUsers": "3", + "screenPageViews": "3", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "5.4880383333333329", + "bounceRate": "0.33333333333333331" + }, + { + "pagePath": "/quero/mentoria/", + "year": "2024", + "month": "01", + "activeUsers": "3", + "screenPageViews": "6", + "screenPageViewsPerSession": "2", + "screenPageViewPerUser": "2", + "averageSessionDuration": "21.643585666666667", + "bounceRate": "0" + }, + { + "pagePath": "/team/", + "year": "2024", + "month": "01", + "activeUsers": "3", + "screenPageViews": "5", + "screenPageViewsPerSession": "1.6666666666666667", + "screenPageViewPerUser": "1.6666666666666667", + "averageSessionDuration": "22.470101333333332", + "bounceRate": "0" + }, + { + "pagePath": "/timeline/", + "year": "2023", + "month": "11", + "activeUsers": "3", + "screenPageViews": "4", + "screenPageViewsPerSession": "1.3333333333333333", + "screenPageViewPerUser": "1.3333333333333333", + "averageSessionDuration": "14.277319666666665", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/produtos-digitais/", + "year": "2023", + "month": "12", + "activeUsers": "3", + "screenPageViews": "3", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "296.36418999999995", + "bounceRate": "0" + }, + { + "pagePath": "/ong/", + "year": "2023", + "month": "11", + "activeUsers": "2", + "screenPageViews": "4", + "screenPageViewsPerSession": "2", + "screenPageViewPerUser": "2", + "averageSessionDuration": "2.0641945", + "bounceRate": "0" + }, + { + "pagePath": "/ong/", + "year": "2023", + "month": "12", + "activeUsers": "2", + "screenPageViews": "2", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "4.509659", + "bounceRate": "0" + }, + { + "pagePath": "/projetos/", + "year": "2024", + "month": "01", + "activeUsers": "2", + "screenPageViews": "3", + "screenPageViewsPerSession": "1.5", + "screenPageViewPerUser": "1.5", + "averageSessionDuration": "14.401451", + "bounceRate": "0" + }, + { + "pagePath": "/quero/estudar/", + "year": "2024", + "month": "01", + "activeUsers": "2", + "screenPageViews": "2", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "2.2691995", + "bounceRate": "0" + }, + { + "pagePath": "/timeline/", + "year": "2023", + "month": "12", + "activeUsers": "2", + "screenPageViews": "2", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "111.770652", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/css3/", + "year": "2024", + "month": "01", + "activeUsers": "2", + "screenPageViews": "2", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "39.4223835", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/produtos-digitais/", + "year": "2024", + "month": "01", + "activeUsers": "2", + "screenPageViews": "2", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "3.550618", + "bounceRate": "0" + }, + { + "pagePath": "/bio/?fbclid=PAAaYbKodlg6scntncoisyqwhryBHELfz2PGBOOmURQd_OQDIRdPHdReLQiFo", + "year": "2023", + "month": "11", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "12.895585", + "bounceRate": "0" + }, + { + "pagePath": "/blog/", + "year": "2024", + "month": "01", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "1.833407", + "bounceRate": "0" + }, + { + "pagePath": "/blog/2022/10/14/hello-world/", + "year": "2023", + "month": "11", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "16.346999", + "bounceRate": "0" + }, + { + "pagePath": "/blog/2022/11/18/codaqui-no-benfeitoriacom/", + "year": "2023", + "month": "12", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "4.267819", + "bounceRate": "0" + }, + { + "pagePath": "/blog/2023/01/05/codaqui-na-confer\u00eancia-do-devpr-2023/", + "year": "2023", + "month": "11", + "activeUsers": "1", + "screenPageViews": "2", + "screenPageViewsPerSession": "2", + "screenPageViewPerUser": "2", + "averageSessionDuration": "493.722616", + "bounceRate": "0" + }, + { + "pagePath": "/blog/2023/03/11/sucesso-na-inova\u00e7\u00e3o-do-devpr-2023/", + "year": "2023", + "month": "12", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "9.84959", + "bounceRate": "0" + }, + { + "pagePath": "/conduta/", + "year": "2023", + "month": "12", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "974.121023", + "bounceRate": "0" + }, + { + "pagePath": "/conduta/", + "year": "2024", + "month": "01", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "3.710431", + "bounceRate": "0" + }, + { + "pagePath": "/ong/", + "year": "2024", + "month": "01", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "50.149765", + "bounceRate": "0" + }, + { + "pagePath": "/pais_responsaveis/", + "year": "2023", + "month": "12", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "1.086987", + "bounceRate": "0" + }, + { + "pagePath": "/pais_responsaveis/", + "year": "2024", + "month": "01", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "56.718541", + "bounceRate": "0" + }, + { + "pagePath": "/timeline/", + "year": "2024", + "month": "01", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "36.877747", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/github-starter/;", + "year": "2023", + "month": "11", + "activeUsers": "1", + "screenPageViews": "4", + "screenPageViewsPerSession": "4", + "screenPageViewPerUser": "4", + "averageSessionDuration": "84.764838", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/html5/", + "year": "2024", + "month": "01", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "9.104028", + "bounceRate": "0" + }, + { + "pagePath": "/trilhas/produtos-digitais/", + "year": "2023", + "month": "11", + "activeUsers": "1", + "screenPageViews": "1", + "screenPageViewsPerSession": "1", + "screenPageViewPerUser": "1", + "averageSessionDuration": "1.262087", + "bounceRate": "0" + } +] \ No newline at end of file diff --git a/data/website_dimensions_info.json b/data/website_dimensions_info.json new file mode 100644 index 0000000..81a9eb3 --- /dev/null +++ b/data/website_dimensions_info.json @@ -0,0 +1,23 @@ +{ + "new": 35230, + "google": 38854, + "returning": 5556, + "(direct)": 3486, + "(not set)": 1892, + "github.com": 50, + "l.instagram.com": 39, + "bing": 13, + "t.co": 15, + "giscus.app": 13, + "codaqui.dev": 19, + "linkedin.com": 14, + "classroom.google.com": 10, + "lnkd.in": 3, + "": 1, + "devpr.org": 2, + "groups.google.com": 2, + "mail.google.com": 2, + "blog.syma.com.br": 1, + "redeglobo.globo.com": 1, + "web.telegram.org": 1 +} \ No newline at end of file diff --git a/data/website_info.json b/data/website_info.json new file mode 100644 index 0000000..08c44e3 --- /dev/null +++ b/data/website_info.json @@ -0,0 +1,9 @@ +[ + { + "activeUsers": "35795", + "screenPageViews": "58331", + "averageSessionDuration": "110.75267276175136", + "bounceRate": "0.39766827368811425", + "sessions": "43144" + } +] \ No newline at end of file diff --git a/google-analytics/report.py b/google-analytics/report.py new file mode 100644 index 0000000..ce9ea30 --- /dev/null +++ b/google-analytics/report.py @@ -0,0 +1,247 @@ +import asyncio +import os +import pandas as pd +import json +from google.analytics.data_v1beta import BetaAnalyticsDataAsyncClient +from google.analytics.data_v1beta.types import ( + DateRange, + Dimension, + Metric, + RunReportRequest, +) +# Google Drive APIs +from google_auth_oauthlib.flow import InstalledAppFlow +from google.auth.transport.requests import Request +from googleapiclient.discovery import build +from googleapiclient.http import MediaFileUpload +import pickle +# Getting the date for prepation to Github Actions +from datetime import datetime +from dateutil.relativedelta import relativedelta + +now = datetime.now() + +LAST_MONTH = (now.replace(day=1) - relativedelta(days=1)).strftime('%Y-%m-%d') + +PROPERTY_ID = 337372858 + +os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'google-analytics\service_account.json' + +client = BetaAnalyticsDataAsyncClient() + +async def sample_run_report(property_id = PROPERTY_ID): + client = BetaAnalyticsDataAsyncClient() + + # Request for page-specific data + page_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + dimensions=[ + Dimension(name='pagePath'), + Dimension(name='year'), + Dimension(name='month'), + ], + metrics=[ + Metric(name='activeUsers'), + Metric(name='screenPageViews'), + Metric(name='screenPageViewsPerSession'), + Metric(name='screenPageViewsPerUser'), + Metric(name='averageSessionDuration'), + Metric(name='bounceRate'), + ], + date_ranges=[DateRange(start_date="2020-01-01", end_date=LAST_MONTH)], + ) + + # Request for website-wide data + website_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + metrics=[ + Metric(name='activeUsers'), + Metric(name='screenPageViews'), + Metric(name='averageSessionDuration'), + Metric(name='bounceRate'), + Metric(name='sessions'), + ], + date_ranges=[DateRange(start_date="2020-01-01", end_date="today")], + ) + + # Request for website-wide data with dimensions + website_dimensions_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + dimensions=[ + Dimension(name='newVsReturning'), + Dimension(name='sessionSource'), + ], + metrics=[Metric(name='sessions')], + date_ranges=[DateRange(start_date="2020-01-01", end_date="today")], + ) + + + page_response = await client.run_report(page_request) + website_response = await client.run_report(website_request) + website_dimensions_response = await client.run_report(website_dimensions_request) + + print("Report result:") + pages_info = [] + for row in page_response.rows: + page_path = row.dimension_values[0].value + year = row.dimension_values[1].value + month = row.dimension_values[2].value + active_users = row.metric_values[0].value + screen_page_views = row.metric_values[1].value + screen_page_views_session = row.metric_values[2].value + screen_page_views_user = row.metric_values[3].value + avg_session_duration = row.metric_values[4].value + bounce_rate = row.metric_values[5].value + + if page_path != "/": + pages_info.append({ + "pagePath": page_path, + "year": year, + "month": month, + "activeUsers": active_users, + "screenPageViews": screen_page_views, + "screenPageViewsPerSession": screen_page_views_session, + "screenPageViewPerUser": screen_page_views_user, + "averageSessionDuration": avg_session_duration, + "bounceRate": bounce_rate, + }) + + with open("data/pages_info.json", "w") as f: + f.write(json.dumps(pages_info, indent = 4)) + + website_info = [] + for row in website_response.rows: + active_users = row.metric_values[0].value + screen_page_views = row.metric_values[1].value + avg_session_duration = row.metric_values[2].value + bounce_rate = row.metric_values[3].value + sessions = row.metric_values[4].value + + website_info.append({ + "activeUsers": active_users, + "screenPageViews": screen_page_views, + "averageSessionDuration": avg_session_duration, + "bounceRate": bounce_rate, + "sessions": sessions + }) + + website_dimensions_info = {} + for row in website_dimensions_response.rows: + new_vs_returning = row.dimension_values[0].value + session_source = row.dimension_values[1].value + sessions = int(row.metric_values[0].value) + + # Aggregate sessions by newVsReturning and sessionSource + if new_vs_returning not in website_dimensions_info: + website_dimensions_info[new_vs_returning] = sessions + else: + website_dimensions_info[new_vs_returning] += sessions + + if session_source not in website_dimensions_info: + website_dimensions_info[session_source] = sessions + else: + website_dimensions_info[session_source] += sessions + + with open("data/website_info.json", "w") as f: + f.write(json.dumps(website_info, indent = 4)) + + with open("data/website_dimensions_info.json", "w") as f: + f.write(json.dumps(website_dimensions_info, indent = 4)) + + # Request for earliest recorded data + earliest_data_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + dimensions=[ + Dimension(name='date'), + ], + metrics=[Metric(name='sessions')], # Placeholder metric + date_ranges=[DateRange(start_date="2020-01-01", end_date="today")], # Large date range +) + earliest_data_response = await client.run_report(earliest_data_request) + # Get the earliest date from the first row + earliest_date = earliest_data_response.rows[0].dimension_values[0].value + print(f"Data started being recorded on {earliest_date}") + +# Run the async function +asyncio.run(sample_run_report()) + +# The path to your OAuth 2.0 credentials +CREDENTIALS_FILE = 'google-analytics/oauth_credentials.json' + +# The path to the token file +TOKEN_FILE = 'google-analytics/token.pickle' + +# The scopes that your application needs access to +SCOPES = ['https://www.googleapis.com/auth/drive.file'] + +# Function to upload a file to Google Drive +def upload_file_to_drive(filename, mimetype, title, folder_name): + creds = None + + # Load the credentials from the token file if it exists + if os.path.exists(TOKEN_FILE): + with open(TOKEN_FILE, 'rb') as token: + creds = pickle.load(token) + + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES) + creds = flow.run_local_server(port=0) + + # Save the credentials for the next run + with open(TOKEN_FILE, 'wb') as token: + pickle.dump(creds, token) + + # Build the Drive service + drive_service = build('drive', 'v3', credentials=creds) + + # Search for the folder + response = drive_service.files().list( + q=f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder'", + spaces='drive', + fields='files(id, name)').execute() + folders = response.get('files', []) + + # If the folder exists, use it. If not, create it. + if not folders: + folder_metadata = { + 'name': folder_name, + 'mimeType': 'application/vnd.google-apps.folder' + } + folder = drive_service.files().create(body=folder_metadata, fields='id').execute() + print(f'Folder ID: {folder.get("id")}') + else: + folder = folders[0] + print(f'Folder ID: {folder.get("id")} (existing folder)') + + # Search for the file in the folder + response = drive_service.files().list( + q=f"name='{title}' and '{folder.get('id')}' in parents", + spaces='drive', + fields='files(id, name)').execute() + files = response.get('files', []) + + # If the file exists, update it. If not, create it. + if files: + file = files[0] + # Update the file + media = MediaFileUpload(filename, mimetype=mimetype, resumable=True) + updated_file = drive_service.files().update( + fileId=file.get('id'), + media_body=media).execute() + print(f'File ID: {updated_file.get("id")} (updated file)') + else: + # Create the file + file_metadata = { + 'name': title, + 'parents': [folder.get('id')] + } + media = MediaFileUpload(filename, mimetype=mimetype) + file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute() + print(f'File ID: {file.get("id")} (new file)') + +# Upload the file to Google Drive +upload_file_to_drive("data/pages_info.json", "application/json", "pages_info.json", "Dados-Codaqui") \ No newline at end of file diff --git a/google-analytics/report_github_actions.py b/google-analytics/report_github_actions.py new file mode 100644 index 0000000..8b3319e --- /dev/null +++ b/google-analytics/report_github_actions.py @@ -0,0 +1,263 @@ +import asyncio +import pandas as pd +# Google Analytics GA4 API +from google.oauth2.service_account import Credentials +#from google.auth.credentials import AnonymousCredentials +from google.analytics.data_v1beta import BetaAnalyticsDataAsyncClient +from google.analytics.data_v1beta.types import ( + DateRange, + Dimension, + Metric, + RunReportRequest, +) +# Google Drive API +from google_auth_oauthlib.flow import InstalledAppFlow +from google.auth.transport.requests import Request +from googleapiclient.discovery import build +from googleapiclient.http import MediaFileUpload +import pickle +# Getting the date for prepation to Github Actions +from datetime import datetime +from dateutil.relativedelta import relativedelta +# OS to find files, but now also to get environment variables from github repository +import os +import json +# Convert back the TOKEN.PICKLE and load it +import base64 +import io + +# Get environment variables +service_account_credentials = os.getenv('SERVICE_ACCOUNT_CREDENTIALS') +property_id = os.getenv('PROPERTY_ID') +token_pickle_base64 = os.getenv('TOKEN_PICKLE') +oauth_credentials = os.getenv('OAUTH_CREDENTIALS') + +# Convert credentials and token from JSON and base64 to Python objects +service_account_credentials = json.loads(service_account_credentials) +oauth_credentials = json.loads(oauth_credentials) +token_pickle = base64.b64decode(token_pickle_base64) +token = pickle.loads(token_pickle) +#token = pickle.loads(io.BytesIO(token_pickle)) + +# Get the date of the last month +now = datetime.now() +LAST_MONTH = (now.replace(day=1) - relativedelta(days=1)).strftime('%Y-%m-%d') + +# Set up constants +PROPERTY_ID = property_id +CREDENTIALS_FILE = oauth_credentials +TOKEN_FILE = token +SCOPES = ['https://www.googleapis.com/auth/drive.file'] + +# Set up Google Analytics client +credentials = Credentials.from_service_account_info(service_account_credentials) +#credentials = AnonymousCredentials.from_service_account_info(service_account_credentials) +client = BetaAnalyticsDataAsyncClient(credentials=credentials) + +async def sample_run_report(property_id = PROPERTY_ID): + + credentials = Credentials.from_service_account_info(service_account_credentials) + client = BetaAnalyticsDataAsyncClient(credentials=credentials) + + # Request for page-specific data + page_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + dimensions=[ + Dimension(name='pagePath'), + Dimension(name='year'), + Dimension(name='month'), + ], + metrics=[ + Metric(name='activeUsers'), + Metric(name='screenPageViews'), + Metric(name='screenPageViewsPerSession'), + Metric(name='screenPageViewsPerUser'), + Metric(name='averageSessionDuration'), + Metric(name='bounceRate'), + ], + date_ranges=[DateRange(start_date="2020-01-01", end_date=LAST_MONTH)], + ) + + # Request for website-wide data + website_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + metrics=[ + Metric(name='activeUsers'), + Metric(name='screenPageViews'), + Metric(name='averageSessionDuration'), + Metric(name='bounceRate'), + Metric(name='sessions'), + ], + date_ranges=[DateRange(start_date="2020-01-01", end_date="today")], + ) + + # Request for website-wide data with dimensions + website_dimensions_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + dimensions=[ + Dimension(name='newVsReturning'), + Dimension(name='sessionSource'), + ], + metrics=[Metric(name='sessions')], + date_ranges=[DateRange(start_date="2020-01-01", end_date="today")], + ) + + + # Run requests and get responses + page_response = await client.run_report(page_request) + website_response = await client.run_report(website_request) + website_dimensions_response = await client.run_report(website_dimensions_request) + + # Process responses and save data + print("Report result:") + pages_info = [] + for row in page_response.rows: + page_path = row.dimension_values[0].value + year = row.dimension_values[1].value + month = row.dimension_values[2].value + active_users = row.metric_values[0].value + screen_page_views = row.metric_values[1].value + screen_page_views_session = row.metric_values[2].value + screen_page_views_user = row.metric_values[3].value + avg_session_duration = row.metric_values[4].value + bounce_rate = row.metric_values[5].value + + if page_path != "/": + pages_info.append({ + "pagePath": page_path, + "year": year, + "month": month, + "activeUsers": active_users, + "screenPageViews": screen_page_views, + "screenPageViewsPerSession": screen_page_views_session, + "screenPageViewPerUser": screen_page_views_user, + "averageSessionDuration": avg_session_duration, + "bounceRate": bounce_rate, + }) + + with open("data/pages_info.json", "w") as f: + f.write(json.dumps(pages_info, indent = 4)) + + website_info = [] + for row in website_response.rows: + active_users = row.metric_values[0].value + screen_page_views = row.metric_values[1].value + avg_session_duration = row.metric_values[2].value + bounce_rate = row.metric_values[3].value + sessions = row.metric_values[4].value + + website_info.append({ + "activeUsers": active_users, + "screenPageViews": screen_page_views, + "averageSessionDuration": avg_session_duration, + "bounceRate": bounce_rate, + "sessions": sessions + }) + + website_dimensions_info = {} + for row in website_dimensions_response.rows: + new_vs_returning = row.dimension_values[0].value + session_source = row.dimension_values[1].value + sessions = int(row.metric_values[0].value) + + # Aggregate sessions by newVsReturning and sessionSource + if new_vs_returning not in website_dimensions_info: + website_dimensions_info[new_vs_returning] = sessions + else: + website_dimensions_info[new_vs_returning] += sessions + + if session_source not in website_dimensions_info: + website_dimensions_info[session_source] = sessions + else: + website_dimensions_info[session_source] += sessions + + with open("data/website_info.json", "w") as f: + f.write(json.dumps(website_info, indent = 4)) + + with open("data/website_dimensions_info.json", "w") as f: + f.write(json.dumps(website_dimensions_info, indent = 4)) + + # Request for earliest recorded data + earliest_data_request = RunReportRequest( + property= f"properties/{PROPERTY_ID}", + dimensions=[ + Dimension(name='date'), + ], + metrics=[Metric(name='sessions')], # Placeholder metric + date_ranges=[DateRange(start_date="2020-01-01", end_date="today")], # Large date range +) + earliest_data_response = await client.run_report(earliest_data_request) + # Get the earliest date from the first row + earliest_date = earliest_data_response.rows[0].dimension_values[0].value + print(f"Data started being recorded on {earliest_date}") + +# Run the async function +asyncio.run(sample_run_report()) + +# Function to upload a file to Google Drive +def upload_file_to_drive(filename, mimetype, title, folder_name, creds): + + # Load the credentials from the token + #creds = pickle.loads(base64.b64decode(TOKEN_FILE)) + + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES) + creds = flow.run_local_server(port=0) + + # Save the credentials for the next run + #TOKEN_FILE = base64.b64encode(pickle.dumps(creds)).decode('utf-8') + + # Build the Drive service + drive_service = build('drive', 'v3', credentials=creds) + + # Search for the folder + response = drive_service.files().list( + q=f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder'", + spaces='drive', + fields='files(id, name)').execute() + folders = response.get('files', []) + + # If the folder exists, use it. If not, create it. + if not folders: + folder_metadata = { + 'name': folder_name, + 'mimeType': 'application/vnd.google-apps.folder' + } + folder = drive_service.files().create(body=folder_metadata, fields='id').execute() + print(f'Folder ID: {folder.get("id")}') + else: + folder = folders[0] + print(f'Folder ID: {folder.get("id")} (existing folder)') + + # Search for the file in the folder + response = drive_service.files().list( + q=f"name='{title}' and '{folder.get('id')}' in parents", + spaces='drive', + fields='files(id, name)').execute() + files = response.get('files', []) + + # If the file exists, update it. If not, create it. + if files: + file = files[0] + # Update the file + media = MediaFileUpload(filename, mimetype=mimetype, resumable=True) + updated_file = drive_service.files().update( + fileId=file.get('id'), + media_body=media).execute() + print(f'File ID: {updated_file.get("id")} (updated file)') + else: + # Create the file + file_metadata = { + 'name': title, + 'parents': [folder.get('id')] + } + media = MediaFileUpload(filename, mimetype=mimetype) + file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute() + print(f'File ID: {file.get("id")} (new file)') + +# Upload the file to Google Drive +upload_file_to_drive("data/pages_info.json", "application/json", "pages_info.json", "Dados-Codaqui", TOKEN_FILE) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8c32814 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +asyncio +pandas +google-auth +google-analytics-data +google-auth-oauthlib +google-api-python-client +pickle5 +python-dateutil +os-win +json5 \ No newline at end of file diff --git a/workflows/report.yaml b/workflows/report.yaml new file mode 100644 index 0000000..47fc58d --- /dev/null +++ b/workflows/report.yaml @@ -0,0 +1,29 @@ +name: Run report + +on: + schedule: + - cron: '0 0 1 * *' + workflow_dispatch: + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run report + run: python report_github_actions.py + env: + SERVICE_ACCOUNT_CREDENTIALS: ${{ secrets.SERVICE_ACCOUNT_CREDENTIALS }} + PROPERTY_ID: ${{ secrets.PROPERTY_ID }} + TOKEN_PICKLE: ${{ secrets.TOKEN_PICKLE }} + OAUTH_CREDENTIALS: ${{ secrets.OAUTH_CREDENTIALS }} \ No newline at end of file