From 7f290440d15b8191c0a41401bb49889841e1dd45 Mon Sep 17 00:00:00 2001 From: casulit Date: Tue, 29 Oct 2024 14:00:58 +0800 Subject: [PATCH] feat: Add raw Lamudi data processing and enrich Property schema- Implement processing of raw Lamudi data- Add project_name, agent_name, product_owner_name to Property table- Refactor queue listening logic and cleanup functions --- config/deno-kv.ts | 664 ++++++++++++++++------------------- data.json | 150 ++++++++ schema.sql | 63 +++- server.ts | 64 ++-- services/openai-assistant.ts | 2 +- utils/clean-text.ts | 18 + 6 files changed, 569 insertions(+), 392 deletions(-) create mode 100644 data.json create mode 100644 utils/clean-text.ts diff --git a/config/deno-kv.ts b/config/deno-kv.ts index 1a5bee8..ad56cf0 100644 --- a/config/deno-kv.ts +++ b/config/deno-kv.ts @@ -1,15 +1,77 @@ -import type { PoolClient, Transaction } from "postgres"; +import type { Transaction } from "postgres"; import { dbPool } from "./postgres.ts"; import { openaiAssistant } from "../services/openai-assistant.ts"; +export interface RawLamudiData { + id: number; + json_data: Record; + raw_title: string; + property_type_id: number; + offer_type_id: number; + agent_name: string; + product_owner_name: string; + listing_region_id: string; + region: string; + listing_city_id: string; + city: string; + listing_area: string; + listing_area_id: string; + rooms_total: number; + floor_size: number; + lot_size: number; + land_size: number; + building_size: number; + no_of_bedrooms: number; + no_of_bathrooms: number; + no_of_parking_spaces: number; + longitude: number; + latitude: number; + year_built: number; + primary_image_url: string; + indoor_features: Record; + outdoor_features: Record; + property_features: Record; + address: string; + project_name: string; + price: string; + price_formatted: string; + description: string; + full_url: string; + images: { + src: string; + type: "preload" | "secondary"; + alt?: string; + dataSrc?: string; + }[]; +} + +export interface Property { + id: number; +} + +export interface Listing { + id: number; + title: string; + url: string; + project_name: string | null; + description: string; + is_scraped: boolean; + address: string | null; + price_formatted: string | null; + price: number; + offer_type_id: number; + property_id: number; + created_at: Date; + updated_at: Date; +} + let db: Deno.Kv | null = null; export interface KvMessage { type: - | "CREATE_LISTING" - | "CREATE_RAW_LAMUDI_DATA" - | "CREATE_AI_GENERATED_DESCRIPTION" - | "PROPERTY_VALUATION"; + | "CREATE_LISTING_FROM_RAW_LAMUDI_DATA" + | "CREATE_RAW_LAMUDI_LISTING_DATA" + | "CREATE_AI_GENERATED_DESCRIPTION"; source: "LAMUDI" | "APP"; // deno-lint-ignore no-explicit-any data: any; @@ -44,383 +106,265 @@ export async function sendMessage(arg: { await kv.enqueue(data, options); } -async function getLocation(client: PoolClient, dataLayer: Location) { - let region = await client.queryObject(` - SELECT id, listing_region_id - FROM Listing_Region - WHERE region = '${dataLayer.region}' - `); - - let city = await client.queryObject(` - SELECT id, listing_city_id - FROM Listing_City - WHERE city = '${dataLayer.city}' - `); - - let area = await client.queryObject(` - SELECT id - FROM Listing_Area - WHERE listing_area_id = '${dataLayer?.listing_area_id || null}' - `); - - if (region.rowCount === 0) { - region = await client.queryObject(` - INSERT INTO Listing_Region (region, listing_region_id) - VALUES ('${dataLayer.region}', '${dataLayer.listing_region_id}') - RETURNING id, listing_region_id - `); - } - - if (city.rowCount === 0) { - const createdRegion = region.rows[0] as { listing_region_id: number }; - - city = await client.queryObject(` - INSERT INTO Listing_City (city, listing_city_id, listing_region_id) - VALUES ('${dataLayer.city}', '${dataLayer.listing_city_id}', '${createdRegion.listing_region_id}') - RETURNING id, listing_city_id - `); - } - - if (area.rowCount === 0) { - area = await client.queryObject(` - INSERT INTO Listing_Area (area, listing_area_id) - VALUES ('${dataLayer.area}', '${dataLayer.listing_area_id}') - RETURNING id - `); - } - - return { - region: region.rows[0] as { id: number; listing_region_id: number }, - city: city.rows[0] as { id: number; listing_city_id: number }, - area: area.rows[0] as { id: number }, - }; -} - -function cleanSpecialCharacters(input: string): string { - if (!input) return "No description"; - - // Encode special characters to ensure they are properly interpreted by the SQL engine - const encodedString = encodeURIComponent(input); - - // Remove emojis and other special characters - const cleanedString = encodedString.replace( - /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu, - "" - ); - - // Remove extra whitespace - const trimmedString = cleanedString.replace(/\s+/g, " ").trim(); - - // Remove any remaining non-printable characters - return trimmedString.replace(/[^\x20-\x7E]/g, ""); -} - export async function listenQueue(kv: Deno.Kv) { await kv.listenQueue(async (msg: KvMessage) => { switch (msg.type) { - case "CREATE_LISTING": - if (msg.source === "LAMUDI" && msg.data.listingUrl && msg.data) { - const handleCondominium = async () => { - let transaction: Transaction | null = null; - const client_1 = await dbPool.connect(); - const client_2 = await dbPool.connect(); - - try { - const attributesLength = Object.keys( - msg.data.dataLayer.attributes - ).length; + case "CREATE_LISTING_FROM_RAW_LAMUDI_DATA": + { + let transaction: Transaction | null = null; + using client = await dbPool.connect(); + try { + transaction = client.createTransaction( + "create_listing_from_raw_lamudi_data", + ); - console.log("attributesLength:", attributesLength); + await transaction.begin(); - transaction = client_1.createTransaction("create-listing"); + const rawProperties = await transaction.queryObject( + ` + SELECT + id, json_data, + json_data->'dataLayer'->>'title' AS raw_title, + CASE + WHEN json_data->'dataLayer'->'attributes'->>'attribute_set_name' = 'Condominium' THEN 1 + WHEN json_data->'dataLayer'->'attributes'->>'attribute_set_name' = 'House' THEN 2 + WHEN json_data->'dataLayer'->'attributes'->>'subcategory' = 'Warehouse' THEN 3 + WHEN json_data->'dataLayer'->'attributes'->>'attribute_set_name' = 'Land' THEN 4 + END AS property_type_id, + CASE + WHEN json_data->'dataLayer'->'attributes'->>'offer_type' = 'Buy' THEN 1 + WHEN json_data->'dataLayer'->'attributes'->>'offer_type' = 'Rent' THEN 2 + END AS offer_type_id, + json_data->'dataLayer'->>'agent_name' AS agent_name, + json_data->'dataLayer'->'attributes'->>'product_owner_name' AS product_owner_name, + json_data->'dataLayer'->'attributes'->>'listing_region_id' AS listing_region_id, + json_data->'dataLayer'->'location'->>'region' AS region, + json_data->'dataLayer'->'attributes'->>'listing_city_id' AS listing_city_id, + json_data->'dataLayer'->'location'->>'city' AS city, + json_data->'dataLayer'->'attributes'->>'listing_area' AS listing_area, + json_data->'dataLayer'->'attributes'->>'listing_area_id' AS listing_area_id, + COALESCE((json_data->'dataLayer'->'location'->>'rooms_total')::INTEGER, 0) AS rooms_total, + COALESCE((json_data->'dataLayer'->'attributes'->>'floor_size')::DOUBLE PRECISION, 0) AS floor_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'lot_size')::DOUBLE PRECISION, 0) AS lot_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'land_size')::DOUBLE PRECISION, 0) AS land_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'building_size')::DOUBLE PRECISION, 0) AS building_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'bedrooms')::INTEGER, 0) AS no_of_bedrooms, + COALESCE((json_data->'dataLayer'->'attributes'->>'bathrooms')::INTEGER, 0) AS no_of_bathrooms, + COALESCE((json_data->'dataLayer'->'attributes'->>'car_spaces')::INTEGER, 0) AS no_of_parking_spaces, + (json_data->'dataLayer'->'attributes'->>'location_longitude')::DOUBLE PRECISION AS longitude, + (json_data->'dataLayer'->'attributes'->>'location_latitude')::DOUBLE PRECISION AS latitude, + (json_data->'dataLayer'->'attributes'->>'year_built')::INTEGER AS year_built, + json_data->'dataLayer'->'attributes'->>'image_url' AS primary_image_url, + (json_data->'dataLayer'->'attributes'->>'indoor_features')::jsonb AS indoor_features, + (json_data->'dataLayer'->'attributes'->>'outdoor_features')::jsonb AS outdoor_features, + (json_data->'dataLayer'->'attributes'->>'other_features')::jsonb AS property_features, + json_data->'dataLayer'->'attributes'->>'listing_address' AS address, + json_data->'dataLayer'->'attributes'->>'project_name' AS project_name, + json_data->'dataLayer'->'attributes'->>'price' AS price, + json_data->'dataLayer'->'attributes'->>'price_formatted' AS price_formatted, + json_data->'dataLayer'->'description'->>'text' AS description, + CONCAT('https://lamudi.com.ph/', json_data->'dataLayer'->'attributes'->>'urlkey_details') AS full_url, + (json_data->>'images')::jsonb AS images + FROM lamudi_raw_data + WHERE is_process = FALSE + AND COALESCE((json_data->'dataLayer'->'attributes'->>'price')::INTEGER, 0) > 5000 + AND json_data->'dataLayer'->'location'->>'region' IS NOT NULL + AND json_data->'dataLayer'->'location'->>'city' IS NOT NULL + AND json_data->'dataLayer'->'attributes'->>'listing_area' IS NOT NULL + LIMIT 10 + `, + ); - await transaction.begin(); + for (const rawProperty of rawProperties.rows) { + let region = await transaction.queryObject(` + SELECT id, listing_region_id + FROM Listing_Region + WHERE listing_region_id = '${rawProperty.listing_region_id}' + `); - if (!msg.data?.dataLayer) { - throw new Error("DataLayer is missing or undefined"); - } + let city = await transaction.queryObject(` + SELECT id, listing_city_id + FROM Listing_City + WHERE listing_city_id = '${rawProperty.listing_city_id}' + `); - if (!msg.data.dataLayer.attributes || attributesLength < 3) { - throw new Error( - "Attributes are missing, undefined, or have fewer than 3 properties" - ); - } + let area = await transaction.queryObject(` + SELECT id + FROM Listing_Area + WHERE listing_area_id = '${rawProperty.listing_area_id}' + `); - if ( - !msg.data.dataLayer.location || - typeof msg.data.dataLayer.location !== "object" - ) { - throw new Error( - "Location is missing, undefined, or not an object" - ); + if (region.rowCount === 0) { + region = await transaction.queryObject({ + args: [rawProperty.region, rawProperty.listing_region_id], + text: `INSERT INTO Listing_Region (region, listing_region_id) + VALUES ($1, $2) + RETURNING id, listing_region_id`, + }); } - let propertyTypeId; - let warehouseTypeId; - const listingUrl = msg.data.listingUrl; - const images = msg.data.images as { src: string }[]; - const isCondominium = - msg.data.dataLayer.attributes.attribute_set_name === - "Condominium"; - const isHouse = - msg.data.dataLayer.attributes.attribute_set_name === "House"; - const isWarehouse = - msg.data.dataLayer.attributes.subcategory === "Warehouse"; - const isLand = - msg.data.dataLayer.attributes.subcategory === "Land"; - - const listingRecord = await transaction.queryObject(` - SELECT id, property_id - FROM Listing - WHERE url = '${listingUrl}' OR title = '${msg.data.dataLayer?.title}' - `); - - if (listingRecord?.rowCount && listingRecord.rowCount > 0) { - const listing = listingRecord.rows[0] as { - id: number; - property_id: number; + if (city.rowCount === 0) { + const createdRegion = region.rows[0] as { + listing_region_id: number; }; - const price = msg.data.dataLayer?.attributes?.price; - const priceFormatted = - msg.data.dataLayer?.attributes?.price_formatted; - - await transaction.queryArray({ - args: [price, priceFormatted, listing.id], - text: ` - UPDATE Listing - SET price = $1, price_formatted = $2 - WHERE id = $3 - `, - }); - - await transaction.queryArray({ + city = await transaction.queryObject({ args: [ - JSON.stringify(msg.data.dataLayer), - JSON.stringify(images.map((image) => image.src)), - listing.property_id, + rawProperty.city, + rawProperty.listing_city_id, + createdRegion.listing_region_id, ], - text: ` - UPDATE Property - SET json_data = $1, images = $2 - WHERE id = $3 - `, + text: + `INSERT INTO Listing_City (city, listing_city_id, listing_region_id) + VALUES ($1, $2, $3) + RETURNING id, listing_city_id`, }); - - if (transaction) await transaction.commit(); - client_1.release(); - client_2.release(); - console.log("Transaction successfully committed for update"); - - return; - } - - if (isCondominium) { - propertyTypeId = 1; - } - - if (isHouse) { - propertyTypeId = 2; } - if (isWarehouse) { - const warehouseType = - msg.data.dataLayer.attributes.attribute_set_name; - - const warehouseTypeRecord = await transaction.queryArray({ - args: [warehouseType], - text: ` - SELECT warehouse_type_id - FROM Warehouse_Type - WHERE type_name = $1 - `, + if (area.rowCount === 0 && rawProperty.listing_area_id) { + area = await transaction.queryObject({ + args: [rawProperty.listing_area, rawProperty.listing_area_id], + text: `INSERT INTO Listing_Area (area, listing_area_id) + VALUES ($1, $2) + RETURNING id`, }); - - if (warehouseTypeRecord.rowCount === 1) { - warehouseTypeId = warehouseTypeRecord.rows[0][0] as number; - } else { - const newWarehouseType = await transaction.queryArray({ - args: [warehouseType], - text: ` - INSERT INTO Warehouse_Type (type_name) - VALUES ($1) RETURNING warehouse_type_id - `, - }); - - warehouseTypeId = newWarehouseType.rows[0][0] as number; - } - - propertyTypeId = 3; } + } - if (isLand) { - propertyTypeId = 4; - } + if (rawProperties.rowCount && rawProperties.rowCount > 0) { + for (const rawProperty of rawProperties.rows) { + const images = rawProperty.images.map((image) => image.src); - const agentId = msg.data.dataLayer?.agent_id; - const agentName = msg.data.dataLayer?.agent_name; - const productOwnerId = msg.data.dataLayer?.product_owner; - const productOwnerName = msg.data.dataLayer?.product_owner_name; - const location: Location = msg.data.dataLayer.location; - const dataLayerAttributes = msg.data.dataLayer.attributes; - const offerTypeId = - dataLayerAttributes.offer_type === "Rent" ? 2 : 1; - const sellerIsTrusted = dataLayerAttributes?.seller_is_trusted; + const listing = await transaction.queryObject({ + args: [rawProperty.full_url], + text: `SELECT url FROM Listing WHERE url = $1`, + }); - const locationData = await getLocation(client_2, { - ...location, - listing_area_id: dataLayerAttributes?.listing_area_id, - }); + if (listing.rowCount && listing.rowCount > 0) { + console.info("Listing already exists"); - const { region, city, area } = locationData; + await transaction.queryObject({ + args: [rawProperty.id], + text: + `UPDATE lamudi_raw_data SET is_process = TRUE WHERE id = $1`, + }); - let property; + await transaction.queryObject({ + args: [ + rawProperty.price, + rawProperty.price_formatted, + rawProperty.full_url, + ], + text: `UPDATE Listing + SET price = $1, price_formatted = $2 + WHERE url = $3`, + }); - try { - property = await transaction.queryObject({ - args: [ - dataLayerAttributes?.floor_size || 0, - dataLayerAttributes?.land_size || 0, - dataLayerAttributes?.building_size || 0, - dataLayerAttributes?.ceiling_height || 0, - dataLayerAttributes?.bedrooms || 0, - dataLayerAttributes?.bathrooms || 0, - dataLayerAttributes?.car_spaces || 0, - dataLayerAttributes.location_longitude, - dataLayerAttributes.location_latitude, - dataLayerAttributes?.year_built || 0, - dataLayerAttributes?.image_url || null, - JSON.stringify(images.map((image) => image.src)), - JSON.stringify(dataLayerAttributes?.amenities || {}), - JSON.stringify( - dataLayerAttributes?.property_features || {} - ), - JSON.stringify(dataLayerAttributes?.indoor_features || {}), - JSON.stringify(dataLayerAttributes?.outdoor_features || {}), - propertyTypeId, - dataLayerAttributes?.address || null, - region.id, - city.id, - area.id, - JSON.stringify(msg.data.dataLayer), - warehouseTypeId || null, - ], - text: ` - INSERT INTO property ( - floor_size, - lot_size, - building_size, - ceiling_height, - no_of_bedrooms, - no_of_bathrooms, - no_of_parking_spaces, - longitude, - latitude, - year_built, - primary_image_url, - images, - amenities, - property_features, - indoor_features, - outdoor_features, - property_type_id, - address, - listing_region_id, - listing_city_id, - listing_area_id, - json_data, - warehouse_type_id - ) VALUES ( - $1, - $2, - $3, - $4, - $5, - $6, - $7, - $8, - $9, - $10, - $11, - $12, - $13, - $14, - $15, - $16, - $17, - $18, - $19, - $20, - $21, - $22, - $23 - ) RETURNING id - `, - }); - } catch (error) { - console.error("Error inserting property:", error); - throw error; - } + await transaction.queryObject({ + args: [ + JSON.stringify(images), + rawProperty.agent_name, + rawProperty.product_owner_name, + rawProperty.project_name, + rawProperty.full_url, + ], + text: `UPDATE Property p + SET images = $1, + agent_name = $2, + product_owner_name = $3, + project_name = $4 + FROM Listing l + WHERE l.property_id = p.id AND l.url = $5`, + }); - const newProperty = property.rows[0] as { id: number }; + continue; + } - const address = `${ - dataLayerAttributes?.listing_area - ? `${dataLayerAttributes.listing_area}, ` - : "" - }${dataLayerAttributes.listing_city}`; + let property; - try { - await transaction.queryObject({ - args: [ - msg.data.dataLayer?.title, - `https://www.lamudi.com.ph/${dataLayerAttributes?.urlkey_details}`, - dataLayerAttributes?.project_name || null, - cleanSpecialCharacters( - msg.data.dataLayer?.description?.text - ), - true, - address, - dataLayerAttributes?.price_formatted - ? `${dataLayerAttributes?.price_formatted}` - : null, - dataLayerAttributes?.price || 0, - offerTypeId, - newProperty.id, - ], - text: `INSERT INTO Listing (title, url, project_name, description, is_scraped, address, price_formatted, price, offer_type_id, property_id) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING id`, - }); - } catch (error) { - console.error("Error inserting listing:", error); - throw error; - } + try { + property = await transaction.queryObject({ + args: [ + rawProperty.floor_size, + rawProperty.lot_size, + rawProperty.building_size, + rawProperty.no_of_bedrooms, + rawProperty.no_of_bathrooms, + rawProperty.no_of_parking_spaces, + rawProperty.longitude, + rawProperty.latitude, + rawProperty.year_built, + rawProperty.primary_image_url, + JSON.stringify(images), + JSON.stringify(rawProperty.property_features), + JSON.stringify(rawProperty.indoor_features), + JSON.stringify(rawProperty.outdoor_features), + rawProperty.property_type_id, + rawProperty.address, + rawProperty.listing_region_id, + rawProperty.listing_city_id, + rawProperty.listing_area_id, + rawProperty.project_name, + rawProperty.agent_name, + rawProperty.product_owner_name, + ], + text: `INSERT INTO Property + ( + floor_size, lot_size, building_size, no_of_bedrooms, + no_of_bathrooms, no_of_parking_spaces, longitude, + latitude, year_built, primary_image_url, images, + property_features, indoor_features, outdoor_features, + property_type_id, address, listing_region_id, listing_city_id, + listing_area_id, project_name, agent_name, product_owner_name + ) + VALUES ( + $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, + $14, $15, $16, $17, $18, $19, $20, $21, $22 + ) RETURNING id`, + }); + } catch (error) { + throw error; + } - await transaction.commit(); - console.log("Transaction successfully committed for create"); - // deno-lint-ignore no-explicit-any - } catch (error: any) { - if (transaction) { - console.log("Transaction rollback"); - await transaction.rollback(); + if (property) { + try { + await transaction.queryObject({ + args: [ + rawProperty.raw_title, + rawProperty.full_url, + rawProperty.project_name, + rawProperty.description, + true, // is_scraped + rawProperty.address, + rawProperty.price_formatted, + rawProperty.price, + rawProperty.offer_type_id, + property.rows[0].id, + ], + text: ` + INSERT INTO Listing ( + title, url, project_name, description, is_scraped, + address, price_formatted, price, offer_type_id, property_id + ) VALUES ( + $1, $2, $3, $4, $5, $6, $7, $8, $9, $10 + ) + `, + }); + } catch (error) { + throw error; + } + } } - throw error; - } finally { - console.log("Connection released"); - client_1.release(); - client_2.release(); } - }; - try { - await handleCondominium(); - // deno-lint-ignore no-explicit-any - } catch (error: any) { - console.error(error?.message || error); + await transaction.commit(); + } catch (error) { + if (transaction) await transaction.rollback(); + console.error(error); } } break; - case "CREATE_RAW_LAMUDI_DATA": + case "CREATE_RAW_LAMUDI_LISTING_DATA": { let transaction: Transaction | null = null; const client_1 = await dbPool.connect(); @@ -433,7 +377,8 @@ export async function listenQueue(kv: Deno.Kv) { msg.data.listingUrl, JSON.stringify(msg.data.images), ], - text: `INSERT INTO Lamudi_raw_data (json_data, listingUrl, images) VALUES ($1, $2, $3)`, + text: + `INSERT INTO Lamudi_raw_data (json_data, listingUrl, images) VALUES ($1, $2, $3)`, }); await transaction.commit(); console.log("Transaction successfully committed for create"); @@ -448,11 +393,12 @@ export async function listenQueue(kv: Deno.Kv) { break; case "CREATE_AI_GENERATED_DESCRIPTION": { - const client_1 = await dbPool.connect(); - + using client = await dbPool.connect(); try { - const property = await client_1.queryObject( - `SELECT * FROM Property WHERE ai_generated_description IS NULL AND property_type_id IN (1, 3) ORDER BY created_at DESC LIMIT 10` + const property = await client.queryObject( + `SELECT * FROM Property + WHERE ai_generated_description IS NULL AND property_type_id IN (1, 3) + ORDER BY created_at DESC LIMIT 10`, ); if (property.rowCount && property.rowCount > 0) { @@ -463,28 +409,29 @@ export async function listenQueue(kv: Deno.Kv) { }; const aiGeneratedDescription = await openaiAssistant( - JSON.stringify(row) + JSON.stringify(row), ); try { JSON.parse( aiGeneratedDescription.includes("```json") ? aiGeneratedDescription - .replace("```json", "") - .replace("```", "") - : aiGeneratedDescription + .replace("```json", "") + .replace("```", "") + : aiGeneratedDescription, ); } catch { throw Error("Invalid AI description format"); } if (aiGeneratedDescription) { - await client_1.queryObject({ + await client.queryObject({ args: [ JSON.stringify(aiGeneratedDescription), propertyData.id, ], - text: `UPDATE Property SET ai_generated_description = $1 WHERE id = $2`, + text: + `UPDATE Property SET ai_generated_description = $1 WHERE id = $2`, }); } }; @@ -502,9 +449,6 @@ export async function listenQueue(kv: Deno.Kv) { console.log("Successfully processed ai generated description"); } catch (error) { console.error(error); - } finally { - client_1.release(); - console.log("Connection released"); } } break; diff --git a/data.json b/data.json new file mode 100644 index 0000000..a322f9e --- /dev/null +++ b/data.json @@ -0,0 +1,150 @@ +{ + "images": [ + { + "src": "https://static-ph.lamudi.com/static/media/bm9uZS9ub25l/2x2x5x880x450/fd650bcd833015.webp", + "type": "preload" + }, + { + "alt": "50000 ₱ Condo", + "src": "https://static-ph.lamudi.com/static/media/bm9uZS9ub25l/2x2x5x380x244/98c3b895975fd5.webp", + "type": "secondary", + "dataSrc": "https://media-ph-live.lamudi.com/static/media/bm9uZS9ub25l/100000x100000/98c3b895975fd5.jpg?country=ph&environment=live&domain=lamudi.com" + }, + { + "alt": " sqm Land Size Condo For Rent", + "src": "https://static-ph.lamudi.com/static/media/bm9uZS9ub25l/2x2x5x380x244/b254e252fb7026.webp", + "type": "secondary", + "dataSrc": "https://media-ph-live.lamudi.com/static/media/bm9uZS9ub25l/100000x100000/b254e252fb7026.jpg?country=ph&environment=live&domain=lamudi.com" + }, + { + "alt": "87 sqm Living Size Condo For Rent in Quezon City", + "src": "https://static-ph.lamudi.com/static/media/bm9uZS9ub25l/2x2x5x380x244/639bee9c56f1fd.webp", + "type": "secondary", + "dataSrc": "https://media-ph-live.lamudi.com/static/media/bm9uZS9ub25l/100000x100000/639bee9c56f1fd.jpg?country=ph&environment=live&domain=lamudi.com" + }, + { + "alt": "Fully furnished Condo For Rent", + "src": "https://static-ph.lamudi.com/static/media/bm9uZS9ub25l/2x2x5x380x244/68db47e41d526a.webp", + "type": "secondary", + "dataSrc": "https://media-ph-live.lamudi.com/static/media/bm9uZS9ub25l/100000x100000/68db47e41d526a.jpg?country=ph&environment=live&domain=lamudi.com" + } + ], + "dataLayer": { + "sku": "CD6650433384575PH", + "title": "Perfect and Spacious 3-BR Family Unit near Greenhills area and New Manila area", + "images": 12, + "country": "ph", + "agent_id": 842901, + "language": "en", + "location": { + "area": "Valencia", + "city": "Quezon City", + "region": "Metro Manila" + }, + "page_type": "product", + "agent_name": "", + "attributes": { + "sku": "CD6650433384575PH", + "name": "Perfect and Spacious 3-BR Family Unit near Greenhills area and New Manila area", + "price": 50000, + "status": "active", + "approved": 1, + "bedrooms": 3, + "is_agent": 0, + "is_viber": 0, + "bathrooms": 3, + "furnished": "", + "image_url": "https://static-ph.lamudi.com/static/media/bm9uZS9ub25l/2x2x5x880x396/fd650bcd833015.jpg", + "categories": [33, 39], + "offer_type": "Rent", + "project_id": "2766", + "is_facebook": 0, + "is_whatsapp": 1, + "rooms_total": 4, + "show_mobile": 1, + "subcategory": "Other", + "furnished_id": 2, + "listing_area": "Valencia", + "listing_city": "Quezon City", + "listing_type": "Classifieds", + "project_name": "Gilmore Heights", + "top_position": 1, + "alternate_sku": "39983295", + "building_size": 87, + "currency_code": "PHP", + "listing_start": "2024-05-24 15:35:15", + "offer_type_id": 1, + "product_owner": 842901, + "root_category": 33, + "listing_region": "Metro Manila", + "other_features": [], + "project_urlkey": "gilmore-heights", + "subcategory_id": 39, + "urlkey_details": "perfect-and-spacious-3-br-family-unit-near-greenhi-171653611579.html", + "indoor_features": [ + "Alarm System", + "Built-in wardrobes", + "Drying Area", + "Elevators", + "Ensuite", + "Fire exits", + "Fitness center", + "Function Room", + "Gym", + "Laundry Area", + "Lobby", + "Lounge", + "Maids Room", + "Meeting rooms", + "Multi-Purpose Hall", + "Powder room", + "Reception Area", + "Smoke detector", + "Fire Alarm" + ], + "listing_address": "Gilmore Heights, Granada Street corner Castilla Street, Quezon City", + "listing_area_id": "125096", + "listing_city_id": "1988", + "price_formatted": "₱50,000", + "price_not_shown": false, + "attribute_set_id": 1, + "outdoor_features": [ + "Function area", + "Open space", + "24-hour security", + "Drying area", + "Spa", + "Shops", + "Fully fenced", + "Parking lot", + "Playground", + "Secure parking", + "Sports facilities", + "Swimming pool" + ], + "show_officephone": 0, + "listing_region_id": "48", + "location_latitude": "14.6109081", + "seller_is_trusted": 1, + "attribute_set_name": "Condominium", + "location_longitude": "121.0361368", + "show_listing_address": 1, + "agent_ratings_enabled": 0, + "product_owner_url_key": "-agn-893922", + "status_supplier_config": "active" + }, + "description": { + "text": "Gilmore Heights Condominium a perfect and spacious 3-Bedroom family unit near Greenhills area and New Manila area for Rent + semi-furnished + three (3) well-ventilated bedrooms + three (3) bathrooms + spacious and bright living room + dining area + kitchen with wall and base cabinets for ample storage + maids’ room + laundry and drying area KEY FEATURES: + condominium has SWIMMING POOL, FITNESS CENTER, PLAYGROUND, HUGE CONFERENCE ROOM, SOCIAL HALL, 24-HOUR SECURITY SERVICE NEARBY: + top schools like XAVIER SCHOOL, ICA, JUBILEE, ATENEO DE MANILA UNIVERSITY, UP DILIMAN, MIRIAM COLLEGE, ST. PAUL UNIVERSITY, UERM, ST. JUDE, DE LA SALLE GREENHILLS + shopping centers like ROBINSONS MAGNOLIA, GREENHILLS SHOPPING CENTER, GATEWAY MALL CUBAO, ALI MALL, FARMERS PLAZA, IL TERRAZZO (TOMAS MORATO) + 40-minute drive to MAKATI CENTRAL BUSINESS DISTRICT + 30-minute drive to BGC + medical facilities like ST. LUKES MEDICAL CENTER QUEZON CITY, CARDINAL SANTOS MEDICAL CENTER, THE MEDICAL CITY, UERM MEDICAL CENTER, CAPITOL MEDICAL CENTER, OUR LADY OF LOURDES HOSPITAL", + "words": 186, + "characters": 1373 + }, + "device_type": "desktop", + "login_state": false, + "device_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/127.0.0.0 Safari/537.36", + "number_of_product_features": 31, + "desktop_leads_optional_phone": 0, + "number_of_product_attributes": 55, + "whatsapp_leads_optional_phone": 0 + }, + "listingUrl": "https://www.lamudi.com.ph/projects/gilmore-heights/perfect-and-spacious-3-br-family-unit-near-greenhi-171653611579/" +} diff --git a/schema.sql b/schema.sql index 967a86c..511ed87 100644 --- a/schema.sql +++ b/schema.sql @@ -82,6 +82,9 @@ CREATE TABLE Property ( created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, geog GEOGRAPHY(Point, 4326), + project_name VARCHAR(100), + agent_name VARCHAR(100), + product_owner_name VARCHAR(100), CONSTRAINT check_floor_size CHECK (floor_size >= 0), CONSTRAINT check_lot_size CHECK (lot_size >= 0), CONSTRAINT check_building_size CHECK (building_size >= 0), @@ -103,6 +106,7 @@ CREATE TABLE Listing ( created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); + CREATE TABLE Price_Change_Log ( id SERIAL PRIMARY KEY, listing_id INTEGER NOT NULL, @@ -113,6 +117,7 @@ CREATE TABLE Price_Change_Log ( ); CREATE TABLE Lamudi_raw_data ( + id SERIAL PRIMARY KEY, json_data JSONB, listingUrl TEXT, images JSONB, @@ -249,4 +254,60 @@ FROM WHERE ST_DWithin(p.geog, search_point.geog, search_point.max_distance_km * 1000) ORDER BY - distance_km; \ No newline at end of file + distance_km; + + +SELECT + id, json_data, + json_data->'dataLayer'->>'title' AS raw_title, + CASE + WHEN json_data->'dataLayer'->'attributes'->>'attribute_set_name' = 'Condominium' THEN 1 + WHEN json_data->'dataLayer'->'attributes'->>'attribute_set_name' = 'House' THEN 2 + WHEN json_data->'dataLayer'->'attributes'->>'subcategory' = 'Warehouse' THEN 3 + WHEN json_data->'dataLayer'->'attributes'->>'attribute_set_name' = 'Land' THEN 4 + END AS property_type_id, + CASE + WHEN json_data->'dataLayer'->'attributes'->>'offer_type' = 'Buy' THEN 1 + WHEN json_data->'dataLayer'->'attributes'->>'offer_type' = 'Rent' THEN 2 + END AS offer_type_id, + json_data->'dataLayer'->>'agent_name' AS agent_name, + json_data->'dataLayer'->'attributes'->>'product_owner_name' AS product_owner_name, + json_data->'dataLayer'->'attributes'->>'listing_region_id' AS listing_region_id, + json_data->'dataLayer'->'location'->>'region' AS region, + json_data->'dataLayer'->'attributes'->>'listing_city_id' AS listing_city_id, + json_data->'dataLayer'->'location'->>'city' AS city, + json_data->'dataLayer'->'attributes'->>'listing_area' AS listing_area, + json_data->'dataLayer'->'attributes'->>'listing_area_id' AS listing_area_id, + COALESCE((json_data->'dataLayer'->'location'->>'rooms_total')::INTEGER, 0) AS rooms_total, + COALESCE((json_data->'dataLayer'->'attributes'->>'floor_size')::DOUBLE PRECISION, 0) AS floor_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'lot_size')::DOUBLE PRECISION, 0) AS lot_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'land_size')::DOUBLE PRECISION, 0) AS land_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'building_size')::DOUBLE PRECISION, 0) AS building_size, + COALESCE((json_data->'dataLayer'->'attributes'->>'bedrooms')::INTEGER, 0) AS no_of_bedrooms, + COALESCE((json_data->'dataLayer'->'attributes'->>'bathrooms')::INTEGER, 0) AS no_of_bathrooms, + COALESCE((json_data->'dataLayer'->'attributes'->>'car_spaces')::INTEGER, 0) AS no_of_parking_spaces, + (json_data->'dataLayer'->'attributes'->>'location_longitude')::DOUBLE PRECISION AS longitude, + (json_data->'dataLayer'->'attributes'->>'location_latitude')::DOUBLE PRECISION AS latitude, + (json_data->'dataLayer'->'attributes'->>'year_built')::INTEGER AS year_built, + json_data->'dataLayer'->'attributes'->>'image_url' AS primary_image_url, + (json_data->'dataLayer'->'attributes'->>'indoor_features')::jsonb AS indoor_features, + (json_data->'dataLayer'->'attributes'->>'outdoor_features')::jsonb AS outdoor_features, + (json_data->'dataLayer'->'attributes'->>'other_features')::jsonb AS property_features, + json_data->'dataLayer'->'attributes'->>'listing_address' AS address, + json_data->'dataLayer'->'attributes'->>'project_name' AS project_name, + json_data->'dataLayer'->'attributes'->>'price' AS price, + json_data->'dataLayer'->'attributes'->>'price_formatted' AS price_formatted, + json_data->'dataLayer'->'description'->>'text' AS description, + CONCAT('https://lamudi.com.ph/', json_data->'dataLayer'->'attributes'->>'urlkey_details') AS full_url, + json_data->>'images' AS images, + array( + SELECT jsonb_array_elements(images) ->> 'src' + FROM lamudi_raw_data + WHERE id = lamudi_raw_data.id + ) AS image_src_urls +FROM lamudi_raw_data +WHERE is_process = FALSE + AND json_data->'dataLayer'->'location'->>'region' IS NOT NULL + AND json_data->'dataLayer'->'location'->>'city' IS NOT NULL + AND json_data->'dataLayer'->'attributes'->>'listing_area' IS NOT NULL +LIMIT 10 diff --git a/server.ts b/server.ts index 451d7dc..d32cc19 100644 --- a/server.ts +++ b/server.ts @@ -4,17 +4,11 @@ import { cors } from "npm:hono/cors"; import { dbPool } from "./config/postgres.ts"; import { getKvInstance, listenQueue, sendMessage } from "./config/deno-kv.ts"; -import { openaiAssistant } from "./services/openai-assistant.ts"; const app = new Hono(); const kv = await getKvInstance(); -app.use( - "*", - cors({ - origin: "*", - }), -); +app.use("*", cors({ origin: "*" })); app.get("/api/properties", async (c: Context) => { using client = await dbPool.connect(); @@ -349,10 +343,10 @@ app.get("/api/properties/valuation", async (c: Context) => { } const queryParams: (number)[] = [propertyTypeId, sizeInSqm]; - let cityClause = ''; - let propertyFeaturesClause = ''; + let cityClause = ""; + let propertyFeaturesClause = ""; let paramCounter = 3; - + if (data.city_id) { const cityId = parseInt(data.city_id); if (isNaN(cityId) || cityId < 1) { @@ -391,7 +385,8 @@ app.get("/api/properties/valuation", async (c: Context) => { return c.json({ error: "Invalid number of parking spaces" }, 400); } queryParams.push(parkingSpaces); - propertyFeaturesClause += `AND p.no_of_parking_spaces = $${paramCounter} `; + propertyFeaturesClause += + `AND p.no_of_parking_spaces = $${paramCounter} `; paramCounter++; } } @@ -427,30 +422,40 @@ app.get("/api/properties/valuation", async (c: Context) => { COUNT(*) as total_comparable_properties FROM PropertyStats l GROUP BY l.offer_type_id - ` + `, }); if (!properties.rows.length) { - return c.json({ - error: "Not enough data to generate valuation for the specified criteria" + return c.json({ + error: "Not enough data to generate valuation for the specified criteria", }, 404); } - const valuationData = properties.rows.reduce((acc, row) => { - const type = row.offer_type_id === 1 ? 'buy' : 'rent'; - const formattedPrice = new Intl.NumberFormat('en-PH', { - style: 'currency', - currency: 'PHP', - minimumFractionDigits: 2 - }).format(row.average_price); - - acc[type] = { - average_price: row.average_price.toString(), - formatted_price: formattedPrice, - total_comparable_properties: row.total_comparable_properties.toString() - }; - return acc; - }, {} as Record); + const valuationData = properties.rows.reduce( + (acc, row) => { + const type = row.offer_type_id === 1 ? "buy" : "rent"; + const formattedPrice = new Intl.NumberFormat("en-PH", { + style: "currency", + currency: "PHP", + minimumFractionDigits: 2, + }).format(row.average_price); + + acc[type] = { + average_price: row.average_price.toString(), + formatted_price: formattedPrice, + total_comparable_properties: row.total_comparable_properties.toString(), + }; + return acc; + }, + {} as Record< + string, + { + average_price: string; + formatted_price: string; + total_comparable_properties: string; + } + >, + ); return c.json({ data: valuationData }); }); @@ -485,7 +490,6 @@ app.get("/api/properties/cities", async (c: Context) => { app.get("/api/properties/:id", async (c: Context) => { using client = await dbPool.connect(); const id = c.req.param("id"); - const query = c.req.query(); if (!id) { return c.json({ error: "Property ID is required" }, 400); diff --git a/services/openai-assistant.ts b/services/openai-assistant.ts index 4e4e216..cc1f3e4 100644 --- a/services/openai-assistant.ts +++ b/services/openai-assistant.ts @@ -67,7 +67,7 @@ export const openaiAssistant = async (question: string) => { const lastMessageForRun = messages.data .filter( - (message) => message.run_id === run.id && message.role === "assistant" + (message) => message.run_id === run.id && message.role === "assistant", ) .pop(); diff --git a/utils/clean-text.ts b/utils/clean-text.ts new file mode 100644 index 0000000..e2faba4 --- /dev/null +++ b/utils/clean-text.ts @@ -0,0 +1,18 @@ +export function cleanText(input: string): string { + if (!input) return "No description"; + + // Encode special characters to ensure they are properly interpreted by the SQL engine + const encodedString = encodeURIComponent(input); + + // Remove emojis and other special characters + const cleanedString = encodedString.replace( + /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu, + "", + ); + + // Remove extra whitespace + const trimmedString = cleanedString.replace(/\s+/g, " ").trim(); + + // Remove any remaining non-printable characters + return trimmedString.replace(/[^\x20-\x7E]/g, ""); +}