From aed0fd8045d7cb3052c982d78df3956568bfd2ac Mon Sep 17 00:00:00 2001 From: James Date: Sun, 26 Jul 2015 15:53:01 +0100 Subject: [PATCH] Issue #1 - example URLs Issue #2 - event in both formats --- .../HTMLIsAnEvent/Parser.php | 75 ++++++++++--------- .../HTMLIsAnEvent/BothTest.php | 21 +----- 2 files changed, 42 insertions(+), 54 deletions(-) diff --git a/src/JMBTechnologyLimited/HTMLIsAnEvent/Parser.php b/src/JMBTechnologyLimited/HTMLIsAnEvent/Parser.php index 8702934..b98ae87 100644 --- a/src/JMBTechnologyLimited/HTMLIsAnEvent/Parser.php +++ b/src/JMBTechnologyLimited/HTMLIsAnEvent/Parser.php @@ -29,8 +29,12 @@ function __construct($html, $url) $dom = new Dom(); $dom->load($html, array( 'strict' => false )); + $idsOfRootElements = array(); + foreach($dom->find('[itemtype="http://schema.org/Event"]') as $node) { + $idsOfRootElements[] = $node->id(); + $event = new Event(); //$places = $node->find('[itemtype="http://schema.org/Place"]'); @@ -86,53 +90,56 @@ function __construct($html, $url) foreach($dom->find('.h-event') as $node) { - $event = new Event(); + if (!in_array($node->id(), $idsOfRootElements)) { - $locations = $node->find(".p-location"); - foreach($locations as $location) { - $location->getParent()->removeChild($location->id()); - } + $event = new Event(); + $locations = $node->find(".p-location"); + foreach($locations as $location) { + $location->getParent()->removeChild($location->id()); + } - $nameContents = $node->find('.p-name'); - if ($nameContents->count() > 0) { - $event->setTitle(html_entity_decode($nameContents[0]->text(true))); - } - $urlContents = $node->find('.u-url a, a.u-url'); - if ($urlContents->count() > 0) { - foreach($urlContents as $urlContent) { - $event->addUrl(new URL(html_entity_decode($urlContent->getAttribute("href")))); + $nameContents = $node->find('.p-name'); + if ($nameContents->count() > 0) { + $event->setTitle(html_entity_decode($nameContents[0]->text(true))); } - } - $startContents = $node->find('time.dt-start'); - if ($startContents->count() > 0) { - if ($startContents[0]->getAttribute("datetime")) { - $event->setStart(new \DateTime($startContents[0]->getAttribute("datetime"), new \DateTimeZone("UTC"))); - } else if ($startContents[0] instanceof Dom\HtmlNode && $startContents[0]->text(true)) { - $event->setStart(new \DateTime($startContents[0]->text(true), new \DateTimeZone("UTC"))); + $urlContents = $node->find('.u-url a, a.u-url'); + if ($urlContents->count() > 0) { + foreach($urlContents as $urlContent) { + $event->addUrl(new URL(html_entity_decode($urlContent->getAttribute("href")))); + } } - } - $endContents = $node->find('time.dt-end'); - if ($endContents->count() > 0) { - if ($endContents[0]->getAttribute("datetime")) { - $event->setEnd(new \DateTime($endContents[0]->getAttribute("datetime"), new \DateTimeZone("UTC"))); - } else if ($endContents[0] instanceof Dom\HtmlNode && $endContents[0]->text(true)) { - $event->setEnd(new \DateTime($endContents[0]->text(true), new \DateTimeZone("UTC"))); + $startContents = $node->find('time.dt-start'); + if ($startContents->count() > 0) { + if ($startContents[0]->getAttribute("datetime")) { + $event->setStart(new \DateTime($startContents[0]->getAttribute("datetime"), new \DateTimeZone("UTC"))); + } else if ($startContents[0] instanceof Dom\HtmlNode && $startContents[0]->text(true)) { + $event->setStart(new \DateTime($startContents[0]->text(true), new \DateTimeZone("UTC"))); + } } - } - $descriptionContents = $node->find('.p-description'); - if ($descriptionContents->count() > 0) { - $event->setDescriptionText(html_entity_decode($descriptionContents[0]->text(true))); - $event->setDescriptionHTML(html_entity_decode($descriptionContents[0]->innerHtml())); - } + $endContents = $node->find('time.dt-end'); + if ($endContents->count() > 0) { + if ($endContents[0]->getAttribute("datetime")) { + $event->setEnd(new \DateTime($endContents[0]->getAttribute("datetime"), new \DateTimeZone("UTC"))); + } else if ($endContents[0] instanceof Dom\HtmlNode && $endContents[0]->text(true)) { + $event->setEnd(new \DateTime($endContents[0]->text(true), new \DateTimeZone("UTC"))); + } + } + + $descriptionContents = $node->find('.p-description'); + if ($descriptionContents->count() > 0) { + $event->setDescriptionText(html_entity_decode($descriptionContents[0]->text(true))); + $event->setDescriptionHTML(html_entity_decode($descriptionContents[0]->innerHtml())); + } - $this->events[] = $event; + $this->events[] = $event; + } } diff --git a/tests/JMBTechnologyLimited/HTMLIsAnEvent/BothTest.php b/tests/JMBTechnologyLimited/HTMLIsAnEvent/BothTest.php index 18f3eef..02da111 100644 --- a/tests/JMBTechnologyLimited/HTMLIsAnEvent/BothTest.php +++ b/tests/JMBTechnologyLimited/HTMLIsAnEvent/BothTest.php @@ -19,7 +19,7 @@ function testFile1() { $events = $parser->getEvents(); - $this->assertEquals(2, count($events)); + $this->assertEquals(1, count($events)); ############################### Event @@ -43,25 +43,6 @@ function testFile1() { $this->assertNull($event1->getDescriptionHtml()); $this->assertNull($event1->getDescriptionText()); - ############################### Event - - $event2 = $events[1]; - - $this->assertEquals("IndieWebCamp 2015",$event2->getTitle()); - $this->assertEquals(1, $event2->getUrlsCount()); - $this->assertEquals("http://indiewebcamp.com/2015",$event2->getUrls()[0]->getUrl()); - - - $this->assertNotNull($event2->getStart()); - $this->assertEquals("2015-07-11T09:30:00+00:00",$event2->getStart()->format("c")); - $this->assertEquals("UTC",$event2->getStart()->getTimezone()->getName()); - - $this->assertNotNull($event2->getEnd()); - $this->assertEquals("2015-07-12T18:00:00+00:00",$event2->getEnd()->format("c")); - $this->assertEquals("UTC",$event2->getEnd()->getTimezone()->getName()); - - $this->assertNull($event2->getDescriptionHtml()); - $this->assertNull($event2->getDescriptionText()); } }