Skip to content

Commit

Permalink
Improved harvesting and added card class
Browse files Browse the repository at this point in the history
  • Loading branch information
SteGriff committed Sep 23, 2013
1 parent 949de19 commit 728eb2a
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 85 deletions.
44 changes: 19 additions & 25 deletions DAL.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@ function sqlString($s, $db){
return trim($db->real_escape_string($s));
}

function DB_create_card($cardObject, $db){
$name = $cardObject["name"];
$mana_cost = ( isset($cardObject["mana_cost"]) ? $cardObject["mana_cost"] : null);
$converted_mana_cost = ( isset($cardObject["converted_mana_cost"]) ? $cardObject["converted_mana_cost"] : null);
$types = $cardObject["types"];
$card_text = $cardObject["card_text"];
$flavor_text = ( isset($cardObject["flavor_text"]) ? $cardObject["flavor_text"] : null );
$power_toughness = ( isset($cardObject["power_toughness"]) ? $cardObject["power_toughness"] : null);
$expansion = $cardObject["expansion"];
$rarity = $cardObject["rarity"];
$card_number = $cardObject["card_number"];
$artist = $cardObject["artist"];
function DB_create_card($card, $db){
$name = $card->get('name');
$mana_cost = $card->has('mana_cost') ? $card->get('mana_cost') : null;
$converted_mana_cost = $card->has('converted_mana_cost') ? $card->get('converted_mana_cost') : null;
$types = $card->get('types');
$card_text = $card->has('card_text') ? $card->get('card_text') : null;
$flavor_text = $card->has('flavor_text') ? $card->get('flavor_text') : null;
$power_toughness = $card->has('power_toughness') ? $card->get('power_toughness') : null;
$expansion = $card->get('expansion');
$rarity = $card->get('rarity');
$card_number = $card->get('card_number');
$artist = $card->get('artist');
return DB_insert_card($name, $mana_cost, $converted_mana_cost, $types, $card_text, $flavor_text, $power_toughness, $expansion, $rarity, $card_number, $artist, $db);
}

Expand Down Expand Up @@ -60,26 +60,20 @@ function SQL_create_card($name, $mana_cost, $converted_mana_cost, $types, $card_
return $SQL;
}

function DB_card_exists($name, $db){
$result = $db->query( SQL_card_exists($name, $db) );
function DB_existing_card($name, $db){
$result = $db->query( SQL_existing_card($name, $db) );

switch ($result->num_rows){
case 0:
return false;
return null;
break;
default:
$fetchedCard = (array) $result->fetch_object();
$fetchedCard["ID"] = "";
$cardObject = array();
foreach ($fetchedCard as $key => $value){
if ($value){
$cardObject[$key] = $value;
}
}
return $cardObject;
$fetchedArray = (array) $result->fetch_object();
$card = new card($fetchedArray);
return $card;
}
}
function SQL_card_exists($name, $db){
function SQL_existing_card($name, $db){
global $CardTable;
$name = sqlString($name, $db);
return "Select * from $CardTable
Expand Down
168 changes: 131 additions & 37 deletions card_extractors.php
Original file line number Diff line number Diff line change
@@ -1,47 +1,139 @@
<?php
function get_line_content($l){
//cardtextbox divs represent seperate lines, which we'll represent with underscores
$v = str_replace("</div><div class=\"cardtextbox\">", " _ ", $l);
$v = str_replace('</div><div class=\"cardtextbox\">', ' _ ', $l);

//Replace all images of icons with their alt text
$v = preg_replace("#\<img[ a-zA-Z0-9/\.\?=&;\"]+alt=\"([\w ]+)\"[ a-zA-Z0-9/\.\?=&;\"]+\>#", '{$1}', $v);

//Replace colour icons with a shorter version {Blue} -> {U}
// but only if we can find a brace in the text (this is a mild optimisation)
if (strpos($v, '{') !== false){
$v = str_replace('{Blue}', '{U}', $v);
$v = str_replace('{Black}', '{B}', $v);
$v = str_replace('{White}', '{W}', $v);
$v = str_replace('{Green}', '{G}', $v);
$v = str_replace('{Red}', '{R}', $v);
$v = str_replace('{Variable Colorless}', '{X}', $v);
}

//Strip out remaining tags and destroy whitespace
return trim( strip_tags($v) );
return trim(strip_tags($v));
}

function tidy_line($l){
return str_replace("\r", ' ', $l);
}

function download_card($name_search){

$card = new card(null);
$name_search = urlencode($name_search);
$url = "http://gatherer.wizards.com/Pages/Search/Default.aspx?name=+[$name_search]";
$page = file_get_contents( $url );
$lines = explode( PHP_EOL, $page );
$lines = download_page($url);

//Loop through lines of the html until the line contains "</title>"
for ($i = 2; strpos($lines[$i], '</title>') === false; $i++){}
//Now $lines[$i-1] contains the page title.
// If the page title has "Card Search" there may be a single matching card,
// for example "Forest", or it may be multiple results
if (strpos($lines[$i-1], 'Card Search') === false){
//Just a normal card page: parse the download
$card = add_data_from_lines($card, $lines);
return $card;
}
else{
//Look for the searchTermDisplay and the number of results in it
// (Use the existing instance of $i to save time);
for (true; strpos($lines[$i], 'searchTermDisplay">') === false; $i++){}
$l = $lines[$i];

//Number of results is in brackets on this line, like (7)
$openBracket = strpos($l, '(') + 1;
if ($openBracket === false){
$card->set_error('The card search results look strange. Can not return a card.');
return $card;
}

$numberOfResults = substr($l, $openBracket, strpos($l, ')') - $openBracket);
if ($numberOfResults == 0){
$card->set_error('No cards match that search.');
return $card;
}

//More than zero results - let's try to find an exact card.
$mvid = matching_multiverse_id($name_search, $lines);
if ($mvid){
$lines = download_page("http://gatherer.wizards.com/Pages/Card/Details.aspx?multiverseid=$mvid");
$card = add_data_from_lines($card, $lines);
}
else{
$card->set_error('Multiple options');
}
return $card;
}
}

function matching_multiverse_id($name, $lines){
//Loop through lines of the html (from 200)
// until one contains a link to the exact card name (or end of document [minus 30 lines])
for ($i = 200; strpos($lines[$i], ">$name</a>") === false && $i < count($lines) - 30; $i++){}

$l = $lines[$i];
$mvidStart = strpos($l, '?multiverseid=');
//Was it found? (Otherwise, we hit the end of the document)
if ($mvidStart === false){ return false; }

$mvidStart += 14; // Where 6 is the length of ?multiverseid=
$mvidEnd = strpos($l, '"', $mvidStart);
$multiverseId = substr($l, $mvidStart, $mvidEnd - $mvidStart);
return $multiverseId;
}

//Return an array of lines representing a web page
function download_page($url){
$page = file_get_contents($url);
return explode(PHP_EOL, $page);
}

function add_data_from_lines($card, $lines){

//var_dump($lines);

//The key we found on this iteration, and for which we seek a value on the next
$flag = null;
//Boolean - is that value entirely HTML?
$htmlFlag = false;

//Boolean - does this non-HTML line need tidying/sanitising?
$untidy = false;

$found_result = false;

$i = -1;
foreach( $lines as $line ) {
foreach($lines as $line) {
$i++;

//All junk before 300 lines and after 500th line.
if ($i < 300){ continue; }
//All junk before 300th and after 500th line.
if ($i < 300){ continue; }
elseif ($i > 500){ break; }

//Get the line with no markup or space
$line = trim( strip_tags($line) );
$line = trim(strip_tags($line));

//If it still has content
if ($line){
//If we flagged this line up as having a value, put it in the object.
if ($flag){
$cardObject[$flag] = $line;
if ($untidy){
$card->set($flag, tidy_line($line));
}
else{
$card->set($flag, $line);
}

//And clear the flag, ready for next.
$flag = null;
$untidy = false;
$htmlFlag = false;
//We have found a result by now.
$found_result = true;
Expand All @@ -50,44 +142,45 @@ function download_card($name_search){
//Normally not an html value, so default to false.
$htmlFlag = false;
switch ($line){
case "Card Name:":
$flag = "name";
case 'Card Name:':
$flag = 'name';
break;
case "Mana Cost:":
$flag = "mana_cost";
case 'Mana Cost:':
$flag = 'mana_cost';
$htmlFlag = true;
break;
case "Converted Mana Cost:":
$flag = "converted_mana_cost";
case 'Converted Mana Cost:':
$flag = 'converted_mana_cost';
break;
case "Types:":
$flag="types";
case 'Types:':
$flag='types';
break;
case "Card Text:":
case 'Card Text:':
//Plain text with interspersed icons (maybe), treat as HTML
$flag = "card_text";
$flag = 'card_text';
$htmlFlag = true;
break;
case "Flavor Text:":
$flag = "flavor_text";
case 'Flavor Text:':
$flag = 'flavor_text';
$untidy = true;
break;
case "Watermark:":
$flag = "watermark";
case 'Watermark:':
$flag = 'watermark';
break;
case "P/T:":
$flag = "power_toughness";
case 'P/T:':
$flag = 'power_toughness';
break;
case "Expansion:":
$flag = "expansion";
case 'Expansion:':
$flag = 'expansion';
break;
case "Rarity:":
$flag = "rarity";
case 'Rarity:':
$flag = 'rarity';
break;
case "Card Number:":
$flag = "card_number";
case 'Card Number:':
$flag = 'card_number';
break;
case "Artist:":
$flag = "artist";
case 'Artist:':
$flag = 'artist';
break;
}
}
Expand All @@ -101,7 +194,7 @@ function download_card($name_search){
// They fall after an opening <div="value"> tag, so we skip that line (+1)
$l = $lines[$i+1];

$cardObject[$flag] = get_line_content($l);
$card->set($flag, get_line_content($l));
//echo " >>>> VALUE HTML [$value] ----- \n";
$flag=null;
$htmlFlag=false;
Expand All @@ -111,9 +204,10 @@ function download_card($name_search){
//Done parsing every line of the page.
// Check if we found anything, otherwise it wasn't a card page.
if (!$found_result){
$cardObject["error"] = "No card with that name";
$card->set_error('No card with that name');
}

return $cardObject;
return $card;
}

?>
47 changes: 47 additions & 0 deletions card_object.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php
class card
{
//An associative array representing a single card
// The keys are card properties such as flavor_text
private $c;

//Initialise to db object (or just pass in null)
function __construct($fetchedArray) {
$this->c = null;
if (isset($fetchedArray)){
foreach ($fetchedArray as $key => $value){
if ($value){
$this->set($key, $value);
}
}
}
$this->clear_error();
}

function set($property, $value){
$this->c[$property] = $value;
}
function get($property){
return $this->c[$property];
}
function has($property){
return isset($this->c[$property]);
}
function json(){
return json_encode($this->c);
}

function set_error($e){
if ($this->no_error()){
$this->c['error'] = $e;
}
}
function clear_error(){
unset($this->c['error']);
}
function no_error(){
return empty($this->c['error']);
}

}
?>
4 changes: 2 additions & 2 deletions create_card_table.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
create table mtg_cards (
ID SERIAL,
primary key(ID),
name varchar(50) not null,
name varchar(141) not null,
mana_cost varchar(50),
converted_mana_cost int,
types varchar(50) not null,
card_text varchar(255) not null,
card_text varchar(500) not null,
flavor_text varchar(255),
power_toughness varchar(10),
expansion varchar(50),
Expand Down
5 changes: 5 additions & 0 deletions database_updates.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
alter table mtg_cards
modify column name varchar(141) not null

alter table mtg_cards
modify column name varchar(500) not null
Loading

0 comments on commit 728eb2a

Please sign in to comment.