#23 fix k means

jphacks · Nov 16, 2024 · 12e7bab · 12e7bab
1 parent 613441e
commit 12e7bab
Show file tree

Hide file tree

Showing 2 changed files with 77 additions and 15 deletions.
diff --git a/admin_panel/func.py b/admin_panel/func.py
@@ -1,5 +1,8 @@
 from sklearn.cluster import KMeans
 import pandas as pd
+from pyclustering.cluster.xmeans import xmeans
+from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
+import warnings
 
 
 def load_data(garbage_csv_path, garbage_cans_csv_path):
@@ -19,3 +22,23 @@ def perform_kmeans_clustering(df, num_clusters):
     kmeans.fit(coordinates)
     centroids = kmeans.cluster_centers_
     return centroids
+
+
+def perform_xmeans_clustering(df, num_clusters=3):
+    """XMeansクラスタリングを実行し、各クラスタの中心を返す関数"""
+    coordinates = df[["latitude", "longitude"]].dropna().values  # NaNの行は除外
+    print(coordinates)
+
+    initial_centers = kmeans_plusplus_initializer(coordinates, 2).initialize()
+
+    xmeans_instance = xmeans(coordinates, initial_centers)
+
+    # warningsを一時的に無効化
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", message="All-NaN (slice|axis) encountered")
+        xmeans_instance.process()
+
+    clusters = xmeans_instance.get_clusters()
+    centroids = [coordinates[cluster].mean(axis=0) for cluster in clusters]
+
+    return centroids
diff --git a/admin_panel/streamlit.py b/admin_panel/streamlit.py
@@ -4,10 +4,16 @@
 from streamlit_folium import st_folium
 import plotly.express as px
 import alter as alt
-from func import load_data, perform_kmeans_clustering
+from func import load_data, perform_kmeans_clustering, perform_xmeans_clustering
+
 
 # ページ設定
-st.set_page_config(page_title="ゴミ＆ゴミ箱 管理パネル", page_icon="🗑️", layout="wide", initial_sidebar_state="expanded")
+st.set_page_config(
+    page_title="ゴミ＆ゴミ箱 管理パネル",
+    page_icon="🗑️",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
 
 # サイドバーのスタイリング
 st.sidebar.title("ゴミ＆ゴミ箱 管理パネル")
@@ -67,7 +73,9 @@
 # データのロード
 garbage_csv_path = "./sample_garbage.csv"
 garbage_cans_csv_path = "./sample_cans.csv"
-df_gb, df_gbcans = load_data(garbage_csv_path=garbage_csv_path, garbage_cans_csv_path=garbage_cans_csv_path)
+df_gb, df_gbcans = load_data(
+    garbage_csv_path=garbage_csv_path, garbage_cans_csv_path=garbage_cans_csv_path
+)
 
 # カラム設定（カラム2を最も太くする）
 col1, col2, col3 = st.columns([1, 3, 1])
@@ -78,42 +86,69 @@
     if not df_gb.empty:
         discarded_counts = df_gb["is_discarded"].value_counts()
         discarded_data = pd.DataFrame(
-            {"Status": ["Discarded", "Not Discarded"], "Count": [discarded_counts.get(1, 0), discarded_counts.get(0, 0)]}
+            {
+                "Status": ["Discarded", "Not Discarded"],
+                "Count": [discarded_counts.get(1, 0), discarded_counts.get(0, 0)],
+            }
         )
 
-        fig = px.pie(discarded_data, names="Status", values="Count", hole=0.3, title="捨てられたゴミの割合")
+        fig = px.pie(
+            discarded_data,
+            names="Status",
+            values="Count",
+            hole=0.3,
+            title="捨てられたゴミの割合",
+        )
         st.plotly_chart(fig)
 
         # Calculate percentage change for discarded garbage (last month vs current month)
         df_gb["month"] = pd.to_datetime(df_gb["created_at"]).dt.to_period("M")
         current_month = df_gb["month"].max()
         last_month = current_month - 1
 
-        current_month_count = df_gb[df_gb["month"] == current_month]["is_discarded"].sum()
+        current_month_count = df_gb[df_gb["month"] == current_month][
+            "is_discarded"
+        ].sum()
         last_month_count = (
-            df_gb[df_gb["month"] == last_month]["is_discarded"].sum() if last_month in df_gb["month"].values else 0
+            df_gb[df_gb["month"] == last_month]["is_discarded"].sum()
+            if last_month in df_gb["month"].values
+            else 0
         )
 
         if last_month_count > 0:
-            change_percentage = ((current_month_count - last_month_count) / last_month_count) * 100
+            change_percentage = (
+                (current_month_count - last_month_count) / last_month_count
+            ) * 100
         else:
             change_percentage = 0
 
         st.write(f"捨てられたゴミの先月比: {change_percentage:.2f}%")
 
 # カラム2: 地図とKMeansクラスタリング
+from streamlit_folium import st_folium
+
+# 地図を表示する処理
 with col2:
     st.markdown("#### Map")
-    # 地図の中心座標（データの平均位置を使う）
+
+    # クラスタリングの数を入力
+    num_clusters = st.number_input(
+        "クラスタリングの数を入力してください",
+        min_value=1,
+        max_value=10,
+        value=4,
+        step=1,
+    )
+
     if not df_gb.empty:
-        center_lat = df_gb["latitude"].mean()  # 緯度（latitude列を使用）
-        center_lon = df_gb["longitude"].mean()  # 経度（longitude列を使用）
+        center_lat = df_gb["latitude"].median()  # 緯度（latitude列を使用）
+        center_lon = df_gb["longitude"].median()  # 経度（longitude列を使用）
     else:
         center_lat = 35.6895
         center_lon = 139.6917
 
     # 地図を作成
-    m = folium.Map(location=[center_lat, center_lon], zoom_start=12)
+    m = folium.Map(location=[center_lat, center_lon], zoom_start=13)
 
     # ゴミ箱の位置を地図に表示
     if not df_gbcans.empty:
@@ -144,9 +179,10 @@
                 icon=folium.Icon(color="blue", icon="arrow-down"),
             ).add_to(m)
 
-    # 最初にクラスタリングを実行してクラスタの中心を表示
+    # クラスタリングを実行してクラスタの中心を表示
     if not df_gb.empty:
-        centroids = perform_kmeans_clustering(df_gb, num_clusters=5)
+        centroids = perform_kmeans_clustering(df_gb, num_clusters=num_clusters)
+        print(centroids)  # クラスタ中心をデバッグ用に表示
         for idx, centroid in enumerate(centroids):
             folium.Marker(
                 location=[centroid[0], centroid[1]],
@@ -157,10 +193,13 @@
     # 地図を表示
     st_folium(m, width=700, height=500)
 
+
 # カラム3: ランキング表示（捨てられているカテゴリ）
 with col3:
     st.markdown("#### 計画")
     if not df_gb.empty:
-        discarded_categories = df_gb[df_gb["is_discarded"] == 1]["type"].value_counts().head(10)
+        discarded_categories = (
+            df_gb[df_gb["is_discarded"] == 1]["type"].value_counts().head(10)
+        )
         st.write("捨てられているカテゴリのランキング")
         st.bar_chart(discarded_categories)