Skip to content

Commit

Permalink
initial release
Browse files Browse the repository at this point in the history
  • Loading branch information
tetreum committed Mar 9, 2016
1 parent a792724 commit 73a3df6
Show file tree
Hide file tree
Showing 15 changed files with 993 additions and 2 deletions.
80 changes: 80 additions & 0 deletions App/Providers/Fotocasa.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
<?php
namespace Xupopter\Providers;

use Xupopter\System\Provider;
use Xupopter\System\IProvider;

class Fotocasa extends Provider implements IProvider
{
private $domain = "http://www.fotocasa.es";

public function crawl ($path)
{
$q = $this->getContent($this->domain . $path);

foreach ($q->find('#search-listing tr.expanded') as $data)
{
$item = $this->parseItem($this->getContent($data->attr("data-url")));

if ($item) {
$this->sendToDB($item);
}
}
}


/**
* Converts provider output to db's input format
*
* @param QueryPath $html
*
* @return mixed (array/boolean)
*/
public function parseItem ($html)
{
$images = [];

/*
transform http://a.ftcs.es/inmesp/anuncio/2015/04/03/135151707/253141017.jpg/w_0/c_690x518/p_1/
to http://a.ftcs.es/inmesp/anuncio/2015/04/03/135151707/253141017.jpg
*/
foreach ($html->find('#containerSlider img') as $img)
{
$src = $img->attr("data-src");

if (empty($src)) {
$src = $img->attr("src");
}

$path = explode(".jpg", $src);
$images[] = $path[0] . ".jpg";
}

$data = [
'title' => trim($html->find('.property-title')->text()),
'description' => trim($html->find('#ctl00_ddDescription .detail-section-content')->text()),
'images' => $images,
'location' => trim($html->find('.section.section--noBorder .detail-section-content')->text()),
'price' => $this->strToNumber($html->find('#priceContainer')->text()),
'meters' => $this->strToNumber($html->find('#litSurface b')->text()),
'floor' => (int)$html->find('#litFloor')->text(),
'url' => $html->find('link[rel="canonical"]')->attr("href")
];

foreach ($html->find('.detail-extras li') as $li) {
$text = trim($li->text());
switch ($text) {
case "Ascensor":
$data["elevator"] = true;
break;
}
}

if ($data["meters"] == 0 || empty($data["description"])) {
return false;
}

return $data;

}
}
127 changes: 127 additions & 0 deletions App/Providers/Habitaclia.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
<?php
namespace Xupopter\Providers;

use Xupopter\System\Provider;
use Xupopter\System\IProvider;

class Habitaclia extends Provider implements IProvider
{
private $domain = "http://www.habitaclia.com";
public $minResults = 500; // min crawled vod content

public function crawl ($path)
{
$q = $this->getContent($this->domain . $path);

foreach ($q->find('#listaAds li a[itemprop=name]') as $data)
{
$item = $this->parseItem($this->getContent($data->attr("href")));

if ($item) {
$this->sendToDB($item);
}
}
}

private function stringToBool ($str)
{
switch ($str)
{
case "":
return true;
break;
}
return false;
}

/**
* Converts provider output to db's input format
*
* @param QueryPath $html
*
* @return mixed (array/boolean)
*/
public function parseItem ($html)
{
$images = [];

// doesnt have images or price
if (!empty($html->find('.cajon-pedir-foto')->text()) || !empty($html->find('.pvpdesde')->text())) {
return false;
}

$location = trim(preg_replace('/(\v|\s)+/', ' ', $html->find('.dir_ex.sprite')->text()));
$description = trim($html->find('[itemprop="description"] p')->text());

$data = [
'title' => $html->find('.h1ficha')->text(),
'location' => $location,
'description' => $description,
'url' => $html->find('link[rel="canonical"]')->attr("href"),
"price" => $this->strToNumber($html->find('[itemprop="price"]')->text())
];

$lastUpdate = trim($html->find('.actualizado.radius')->text());

preg_match("/\(([0-9\/]+)\)/", $lastUpdate, $matches);
if (isset($matches[1])) {
$data["lastUpdate"] = $matches[1];
}

foreach ($html->find('#inificha .bodis ul li') as $li)
{
$text = $li->text();

if (strpos($text, " m2") !== false) {
$data["meters"] = $this->strToNumber($li->find("span")->text());
} else if (strpos($text, "habitaciones") !== false) {
$data["rooms"] = (int)$text;
}
}

foreach ($html->find('.caracteristicas li') as $li)
{
$text = $li->text();

if (strpos($text, ":") === false) {
continue;
}

$info = explode(":", $text);

switch (trim($info[0]))
{
case "Número de planta":
$data["floor"] = (int)$info[1];
break;
case "Aire acondicionado":
$data["airConditioner"] = $this->stringToBool(trim($info[1]));
break;
case "Calefacción":
$data["heating"] = $this->stringToBool(trim($info[1]));
break;
case "Parking":
$data["parking"] = $this->stringToBool(trim($info[1]));
break;
case "Ascensor":
$data["elevator"] = $this->stringToBool(trim($info[1]));
break;
case "Amueblado":
$data["furnished"] = $this->stringToBool(trim($info[1]));
break;
}
}

foreach ($html->find(".ficha_foto img") as $img)
{
$image = str_replace("G.jpg", "XL.jpg", $img->attr("src"));
$images[] = $image;
}

if (sizeof($images) > 0) {
$data["images"] = $images;
}

return $data;
}
}
84 changes: 84 additions & 0 deletions App/Providers/Idealista.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?php
namespace Xupopter\Providers;

use Xupopter\System\Provider;
use Xupopter\System\IProvider;

class Idealista extends Provider implements IProvider
{
private $domain = "http://www.idealista.com";

public function crawl ($path)
{
$q = $this->getContent($this->domain . $path);

foreach ($q->find('.item-link') as $data)
{
$item = $this->parseItem($this->getContent($this->domain . $data->attr("href")));

if ($item) {
$this->sendToDB($item);
}
}
}


/**
* Converts provider output to db's input format
*
* @param QueryPath $html
*
* @return mixed (array/boolean)
*/
public function parseItem ($html)
{
$images = [];

// get ch var from og image (required to display the images)
$ogImage = $html->find('[name="og:image"]')->attr("content");

if (empty($ogImage)) {
return false;
}

parse_str(parse_url($ogImage)["query"], $query);
$imageCh = $query["ch"];

/*
transform http://img3.idealista.com/thumbs,W,H,wi,+tSLyO%2BcnvWFQ1vfQ1%2FQRH6EBc9TEzAKu5PmhgV%2
to http://img3.idealista.com/thumbs?wi=1500&he=0&en=%2BtSLyO%2BcnvWFQ1vfQ1%2FQRH6EBc9TEzAKu5PmhgV%2&ch=2106166706
*/
foreach ($html->find('#main-multimedia img') as $img) {
$image = str_replace("http://img3.idealista.com/thumbs,W,H,wi,+", "", $img->attr("data-service"));

$images[] = "http://img3.idealista.com/thumbs?wi=1500&he=0&en=%2B" . urlencode($image) . "&ch=" . $imageCh;
}

$title = trim($html->find('h1.txt-bold span')->text());
$location = str_replace("Piso en venta en ", "", $title);
$location = str_replace("Piso en alquiler en ", "", $location);

$data = [
'title' => $title,
'description' => trim($html->find('.adCommentsLanguage.expandable')->text()),
'images' => $images,
'location' => $location,
'price' => $this->strToNumber($html->find('#main-info .txt-big.txt-bold')->eq(0)->text()),
'url' => $html->find('#share-link')->attr("href")
];

foreach ($html->find('#fixed-toolbar .info-data > span') as $item)
{
$text = $item->text();

$this->parseHouseInfo($text, $data);
}

if (!isset($data["meters"]) || $data["meters"] == 0 || empty($data["description"])) {
return false;
}

return $data;

}
}
101 changes: 101 additions & 0 deletions App/Providers/Pisos.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?php
namespace Xupopter\Providers;

use Xupopter\System\Provider;
use Xupopter\System\IProvider;

class Pisos extends Provider implements IProvider
{
private $domain = "http://www.pisos.com";
private $itemProps = [
"postalCode",
"latitude",
"longitude",
];

public function crawl ($path)
{
$q = $this->getContent($this->domain . $path);

foreach ($q->find('[itemprop="photo"] [itemprop="url"]') as $data)
{
$item = $this->parseItem($this->getContent($this->domain . $data->attr("content")));

if ($item) {
$this->sendToDB($item);
}
}
}

/**
* Converts provider output to db's input format
*
* @param QueryPath $html
*
* @return mixed (array/boolean)
*/
public function parseItem ($html)
{
$images = [];
$data = [
'title' => trim($html->find('h1.title')->text()),
'description' => trim($html->find('.description')->text()),
'price' => $this->strToNumber($html->find('.jsPrecioH1')->eq(0)->text()),
'url' => $html->find('link[rel="canonical"]')->attr("href")
];

foreach ($this->itemProps as $prop)
{
$propVal = $html->find('[itemprop="' . $prop . '"]')->attr("content");

if (!empty($propVal)) {
$data[$prop] = $propVal;
}
}

// try to get the exact address
$location = $html->find('[itemprop="streetAddress"]')->attr("content");

if (empty($location)) {
$location = $html->find('meta[itemprop="name"]')->attr("content");
$location = str_replace("Piso en venta en ", "", $location);
$location = str_replace("Piso en alquiler en ", "", $location);
}

$data['location'] = $location . ", " . $html->find('h2.position')->text();

foreach ($html->find('.characteristics .item') as $item)
{
$text = $item->text();

$this->parseHouseInfo($text, $data);
}

// skip retards that dont even fill the apartment meters
if (!isset($data["meters"]) || $data["meters"] < 1) {
return false;
}

/*
from http://fotos.imghs.net/s/1030/129/1030_27926263129_1_2015112416580031250.jpg
to http://fotos.imghs.net/xl/1030/129/1030_27926263129_1_2015112416580031250.jpg
*/
foreach ($html->find("#basic img") as $img)
{
$image = str_replace(".net/s/", ".net/xl/", $img->attr("src"));

// skip the default photos
if (strpos($image, "nofoto_mini.jpg") !== false || strpos($image, "blank1x1.png") !== false || strpos($image, "Images/assets") !== false) {
continue;
}

$images[] = $image;
}

if (sizeof($images) > 0) {
$data["images"] = $images;
}

return $data;
}
}
Loading

0 comments on commit 73a3df6

Please sign in to comment.