Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
m-amin-alavian committed Dec 5, 2023
0 parents commit 0bb0eb1
Show file tree
Hide file tree
Showing 17 changed files with 17,724 additions and 0 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
__pycache__/
dist/
.venv/
.vscode/

*checkpoint.ipynb*
*draft*

*/Data/*
temp/*
fonts/*

*.xlsx
*.pdf
*.log
*.png

settings.yaml
*tokens.toml
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# LFSIR
1 change: 1 addition & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# LFSIR
1 change: 1 addition & 0 deletions docs/api/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# LFSIR Main API
9 changes: 9 additions & 0 deletions docs/api/load_table.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

::: lfsir.api.load_table
handler: python
options:
heading_level: 1
members_order: source
show_root_heading: true
show_signature: false
show_root_toc_entry: true
1 change: 1 addition & 0 deletions docs/tables/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Tables
6,338 changes: 6,338 additions & 0 deletions docs/tables/cleaned/data.md

Large diffs are not rendered by default.

7,338 changes: 7,338 additions & 0 deletions docs/tables/raw/data.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions lfsir/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .api import load_table
117 changes: 117 additions & 0 deletions lfsir/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""LFSIR, Labor Force Survey of Iran, Main API
"""
# pylint: disable=too-many-arguments
# pylint: disable=unused-argument
# pylint: disable=too-many-locals

from pathlib import Path
from typing import Literal

import pandas as pd

from bssir.metadata_reader import config, _Years
from bssir.api import API

defaults, metadata = config.set_package_config(Path(__file__).parent)
api = API(defaults=defaults, metadata=metadata)

_Table = Literal["data"]


def __get_optional_params(local_variables: dict) -> dict:
return {key: value for key, value in local_variables.items() if value is not None}


def load_table(
table_name: _Table = "data",
years: _Years = "last",
form: Literal["normalized", "cleaned", "raw"] | None = None,
*,
on_missing: Literal["error", "download", "create"] | None = None,
redownload: bool | None = None,
save_downloaded: bool | None = None,
recreate: bool | None = None,
save_created: bool | None = None,
) -> pd.DataFrame:
"""Load a table for the given table name and year(s).
This function loads original survey tables as well as package
tables defined in this library.
Original survey tables are available in three forms:
- 'raw' - Contains the raw survey data without any modifications
- 'cleaned' - Raw data with added labels, types, removed irrelevant
values, but no changes to actual data
- 'normalized' - Standardized data form with consistent column
names, value encodings and table structure applied across data
from multiple survey years. Also adds useful metadata like
table name and year identifiers.
Package tables are defined in this library to facilitate working
with the data and are only available in normalized form.
For more details on available tables, see the documentation
[tables page](https://iran-open-data.github.io/LFSIR/tables/).
!!! note
The `years` parameter accepts different input types:
- int: A single year integer like 1390 or 90
- list[int]: A list of integer years [1390, 1395, 1400]
- str: A hyphenated string range like '1390-1395'
- "all": A string indicating all available years
- "last": A string indicating only the most recent year
Parameters
----------
table_name : _Table, default "data"
The name of the table to load.
years : _Years, default "last"
The years of data to load.
form : {"normalized", "cleaned", "raw"}, default "normalized"
The form of the data to load. Options are "normalized",
"cleaned", or "raw".
Other parameters
----------------
on_missing : {"download", "create", "error"}, default "download"
Behavior if table is missing. "download" downloads the table,
"create" generates table from raw data, "error" raises an
exception.
redownload : bool, default False
Whether to re-download table if it exists.
save_downloaded : bool, default True
Whether to save newly downloaded data.
recreate : bool, default False
Whether to recreate table if it exists.
save_created : bool, default True
Whether to save newly created data.
Returns
-------
pd.DataFrame
Loaded table as a pandas DataFrame.
Raises
------
FileNotFoundError
If data is missing and on_missing='error'.
Examples
--------
>>> import lfsir
>>> df = lfsir.load_table("data", 1400) # Loads survey data for year 1400
>>> df.iloc[:5, :5]
ID Census_Turn Alternative_Household Survey_Taken Survey_Skip_Reason
0 100141 4 False True None
1 100141 4 False True None
2 100142 3 False True None
3 100142 3 False True None
4 100142 3 False True None
"""

parameters = __get_optional_params(locals())
return api.load_table(**parameters)
137 changes: 137 additions & 0 deletions lfsir/metadata/id_information.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@

ID_Length:
1384: 10


ID_Year:
code:
position:
start: 0
end: 2

name:
84: 1384
85: 1385
86: 1386
87: 1387
88: 1388
89: 1389

90: 1390
91: 1391
92: 1392
93: 1393
94: 1394
95: 1395
96: 1396
97: 1397
98: 1398
99: 1399

0: 1400
1: 1401
2: 1402
3: 1403
4: 1404
5: 1405
6: 1406
7: 1407
8: 1408
9: 1409


Province:
code:
position:
start: 2
end: 4

name:
0: "Markazi" # مرکزی
1: "Gilan" # گیلان
2: "Mazandaran" # مازندران
3: "East_Azerbaijan" # آذربایجان شرقی
4: "West_Azerbaijan" # آذربایجان غربی
5: "Kermanshah" # کرمانشاه
6: "Khuzestan" # خوزستان
7: "Fars" # فارس
8: "Kerman" # کرمان
9: "Razavi_Khorasan" # خراسان رضوی
10: "Isfahan" # اصفهان
11: "Sistan_and_Baluchestan" # سیستان و بلوچستان
12: "Kurdistan" # کردستان
13: "Hamadan" # همدان
14: "Chaharmahal_and_Bakhtiari" # چهارمحال و بختیاری
15: "Lorestan" # لرستان
16: "Ilam" # ایلام
17: "Kohgiluyeh_and_Boyer-Ahmad" # کهکیلویه و بویر احمد
18: "Bushehr" # بوشهر
19: "Zanjan" # زنجان
20: "Semnan" # سمنان
21: "Yazd" # یزد
22: "Hormozgan" # هرمزگان
23: "Tehran" # تهران
24: "Ardabil" # اردبیل
25: "Qom" # قم
26: "Qazvin" # قزوین
27: "Golestan" # گلستان
28: "North_Khorasan" # خراسان شمالی
29: "South_Khorasan" # خراسان جنوبی
30: "Alborz" # البرز

farsi_name:
0: "مرکزی"
1: "گیلان"
2: "مازندران"
3: "آذربایجان شرقی"
4: "آذربایجان غربی"
5: "کرمانشاه"
6: "خوزستان"
7: "فارس"
8: "کرمان"
9: "خراسان رضوی"
10: "اصفهان"
11: "سیستان و بلوچستان"
12: "کردستان"
13: "همدان"
14: "چهارمحال و بختیاری"
15: "لرستان"
16: "ایلام"
17: "کهکیلویه و بویر احمد"
18: "بوشهر"
19: "زنجان"
20: "سمنان"
21: "یزد"
22: "هرمزگان"
23: "تهران"
24: "اردبیل"
25: "قم"
26: "قزوین"
27: "گلستان"
28: "خراسان شمالی"
29: "خراسان جنوبی"
30: "البرز"



Urban_Rural:
code:
position:
start: 4
end: 5

name:
1363:
0: "Rural"
1: "Urban"
1387:
1: "Urban"
2: "Rural"

farsi_name:
1363:
0: "روستایی"
1: "شهری"
1387:
1: "روستایی"
2: "شهری"
94 changes: 94 additions & 0 deletions lfsir/metadata/raw_files.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@

1384:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_84_990826.zip

1385:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_85_990826.zip

1386:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_86_990826.zip

1387:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_87_990826.zip

1388:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_88_990826.zip

1389:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_89_990826.zip


1390:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_90_990826.zip

1391:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_91_990826.zip

1392:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_92_990826.zip

1393:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_93_990826.zip

1394:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_94_990826.zip

1395:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS/LFSRawData_95_990826.zip

1396:
compressed_files:
- name: data.rar
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS96_RawData.rar

1397:
compressed_files:
- name: data.rar
address: https://www.amar.org.ir/Portals/0/amarmozuii/Dade_Kham/LFS97_RawData.rar

1398:
compressed_files:
- name: data.rar
address: https://www.amar.org.ir/Portals/0/amarmozuii/niruyekar/lfs98_raw%20data990721.rar

1399:
compressed_files:
- name: data.rar
address: https://www.amar.org.ir/Portals/0/amarmozuii/niruyekar/LFS_RawData99_14000615.rar


1400:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/niruyekar/LFS_RawData1400_14010130.zip

1401:
compressed_files:
- name: data.zip
address: https://www.amar.org.ir/Portals/0/amarmozuii/niruyekar/LFS_RawData1401_14020712.zip


5 changes: 5 additions & 0 deletions lfsir/metadata/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@


data:
instructions:
- add_year
Loading

0 comments on commit 0bb0eb1

Please sign in to comment.