display rome-naf affinity coefficient in data backoffice

StartupsPoleEmploi · Nov 17, 2017 · 3c1103f · 3c1103f
1 parent 13015de
commit 3c1103f
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 17 deletions.
diff --git a/labonneboite/common/mapping.py b/labonneboite/common/mapping.py
@@ -90,6 +90,21 @@ def get_total_naf_hirings(naf):
     return sum(MANUAL_NAF_ROME_MAPPING[naf][rome] for rome in romes)
 
 
+@lru_cache(maxsize=32*1024)
+def get_affinity_between_rome_and_naf(rome_code, naf_code):
+    """
+    Ratio of hirings of this NAF made by this ROME.
+    """
+    current_rome_hirings = MANUAL_NAF_ROME_MAPPING[naf_code][rome_code]
+    total_naf_hirings = get_total_naf_hirings(naf_code)
+
+    if not (current_rome_hirings >= 1 and current_rome_hirings <= total_naf_hirings):
+        raise Exception("error in hiring data for rome_code=%s and naf_code=%s" % (rome_code, naf_code))
+
+    # 1.0 used to force float result, otherwise int/int like 30/100 give... zero
+    return 1.0 * current_rome_hirings / total_naf_hirings
+
+
 class Rome2NafMapper(object):
 
     def __init__(self):
@@ -113,6 +128,7 @@ def map(self, rome_codes, optional_naf_codes=None):
                     naf_codes.add(naf)
         return list(naf_codes)
 
+    @lru_cache(maxsize=1024)  # about 700 naf_codes
     def romes_for_naf(self, naf):
         """
         Returns ROME codes matching the given NAF code as a list of named tuples ordered by the number of hires.
@@ -129,21 +145,27 @@ def romes_for_naf(self, naf):
         Rome = namedtuple('Rome', ['code', 'name', 'nafs'])
         return [Rome(rome[0], settings.ROME_DESCRIPTIONS[rome[0]], rome[1]) for rome in romes_for_naf]
 
+    @lru_cache(maxsize=8*1024)  # about 500 rome_codes in current dataset and 5000 in sliced dataset
     def nafs_for_rome(self, rome):
         """
         Returns NAF codes matching the given ROME code as a list of named tuples ordered by the number of hires.
         E.g. for ROME M1607:
         [
-            Naf(code='8810A', name=u'Aide à domicile', hirings=2830),
-            Naf(code='6831Z', name=u'Agences immobilières', hirings=897),
-            Naf(code='8422Z', name=u'Défense', hirings=6),
+            Naf(code='8810A', name=u'Aide à domicile', hirings=2830, affinity=0.04),
+            Naf(code='6831Z', name=u'Agences immobilières', hirings=897, affinity=0.08),
+            Naf(code='8422Z', name=u'Défense', hirings=6, affinity=0.20),
             ...
         ]
         """
         nafs = self.rome_2_naf_dict.get(rome, {})
         nafs = sorted(nafs.items(), key=lambda (k, v): v, reverse=True)
-        Naf = namedtuple('Naf', ['code', 'name', 'hirings'])
-        return [Naf(naf[0], settings.NAF_CODES[naf[0]], naf[1]) for naf in nafs]
+        Naf = namedtuple('Naf', ['code', 'name', 'hirings', 'affinity'])
+        return [Naf(
+            naf[0],
+            settings.NAF_CODES[naf[0]],
+            naf[1],
+            get_affinity_between_rome_and_naf(rome, naf[0]),
+            ) for naf in nafs]
 
     @staticmethod
     def romes_is_valid(rome):

diff --git a/labonneboite/common/scoring.py b/labonneboite/common/scoring.py
@@ -49,9 +49,11 @@ def _get_score_from_hirings(hirings, as_float=False):
     elif hirings >= settings.SCORE_100_HIRINGS:
         score = 100
     elif hirings <= settings.SCORE_60_HIRINGS:
-        score = 50 + 10 * (hirings - settings.SCORE_50_HIRINGS) / (settings.SCORE_60_HIRINGS - settings.SCORE_50_HIRINGS)
+        score = (50 + 10 * (hirings - settings.SCORE_50_HIRINGS) /
+            (settings.SCORE_60_HIRINGS - settings.SCORE_50_HIRINGS))
     elif hirings <= settings.SCORE_80_HIRINGS:
-        score = 60 + 20 * (hirings - settings.SCORE_60_HIRINGS) / (settings.SCORE_80_HIRINGS - settings.SCORE_60_HIRINGS)
+        score = (60 + 20 * (hirings - settings.SCORE_60_HIRINGS) /
+            (settings.SCORE_80_HIRINGS - settings.SCORE_60_HIRINGS))
     elif hirings <= settings.SCORE_100_HIRINGS:
         score = 80 + 20.0/math.log10(settings.SCORE_100_HIRINGS) * math.log10(1 + hirings - settings.SCORE_80_HIRINGS)
     else:
@@ -84,9 +86,11 @@ def get_hirings_from_score(score):
     if score <= 50:
         hirings = settings.SCORE_50_HIRINGS * score / 50.0
     elif score <= 60:
-        hirings = settings.SCORE_50_HIRINGS + (score - 50) / 10.0 * (settings.SCORE_60_HIRINGS - settings.SCORE_50_HIRINGS)
+        hirings = (settings.SCORE_50_HIRINGS +
+            (score - 50) / 10.0 * (settings.SCORE_60_HIRINGS - settings.SCORE_50_HIRINGS))
     elif score <= 80:
-        hirings = settings.SCORE_60_HIRINGS + (score - 60) / 20.0 * (settings.SCORE_80_HIRINGS - settings.SCORE_60_HIRINGS)
+        hirings = (settings.SCORE_60_HIRINGS +
+            (score - 60) / 20.0 * (settings.SCORE_80_HIRINGS - settings.SCORE_60_HIRINGS))
     elif score <= 100:
         hirings = -1 + settings.SCORE_80_HIRINGS + 10.0 ** ((score-80) / 20.0 * math.log10(settings.SCORE_100_HIRINGS))
     else:
@@ -110,14 +114,8 @@ def get_score_adjusted_to_rome_code_and_naf_code(score, rome_code, naf_code):
         return score
 
     total_office_hirings = get_hirings_from_score(score)
-    total_naf_hirings = mapping_util.get_total_naf_hirings(naf_code)
-    current_rome_hirings = mapping_util.MANUAL_NAF_ROME_MAPPING[naf_code][rome_code]
-
-    if not (current_rome_hirings >= 1 and current_rome_hirings <= total_naf_hirings):
-        raise Exception("error in hiring data for rome_code=%s and naf_code=%s" % (rome_code, naf_code))
-
-    # 1.0 used to force float result, otherwise int/int like 30/100 give... zero
-    office_hirings_for_current_rome = total_office_hirings * (1.0 * current_rome_hirings / total_naf_hirings)
+    affinity = mapping_util.get_affinity_between_rome_and_naf(rome_code, naf_code)
+    office_hirings_for_current_rome = total_office_hirings * affinity
 
     # result should be integer
     return get_score_from_hirings(office_hirings_for_current_rome, as_float=False)
diff --git a/labonneboite/web/templates/data/nafs_for_rome.html b/labonneboite/web/templates/data/nafs_for_rome.html
@@ -24,6 +24,7 @@ <h2>Codes NAF associés à un code ROME</h2>
             <br>
             <span class="badge badge-info">{{ total_hirings_for_rome }}</span>
           </th>
+          <th>Coefficient d'affinité: pourcentage des recrutements de ce NAF qui sont fait avec ce ROME</th>
           <th> </th>
         </tr>
       </thead>
@@ -38,6 +39,9 @@ <h2>Codes NAF associés à un code ROME</h2>
             <td>
               {{ (naf.hirings / total_hirings_for_rome * 100) | round(2) }} %
             </td>
+            <td>
+              {{ (naf.affinity * 100) | round(2) }} %
+            </td>
             <td>
               <small><a href="{{ url_for('data.romes_for_naf', naf=naf.code) }}">ROME associés</a></small>
             </td>