From 2e1f632e822b93e498486ebe2f30131d10ab964b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Girault?=
Date: Fri, 6 Feb 2015 00:44:20 +0100
Subject: [PATCH] First version
---
.gitignore | 1 +
README.md | 68 ++++++++++++++++
composer.json | 23 ++++++
src/Nokogiri.php | 78 ++++++++++++++++++
src/Parser.php | 180 +++++++++++++++++++++++++++++++++++++++++
tests/NokogiriTest.php | 50 ++++++++++++
6 files changed, 400 insertions(+)
create mode 100644 .gitignore
create mode 100644 README.md
create mode 100644 composer.json
create mode 100644 src/Nokogiri.php
create mode 100644 src/Parser.php
create mode 100644 tests/NokogiriTest.php
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..61ead86
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/vendor
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0b83e1a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,68 @@
+Nokogiri
+========
+
+Cuts through XML like a breeze.
+
+Examples
+--------
+
+Given this XML:
+
+```xml
+
+ Lorem ipsum dolor sit amet.
+
+```
+
+Cutting it at the twentieth character...
+
+```php
+$Nokogiri = new Nokogiri\Nokogiri();
+$Nokogiri->cut($xml, 20);
+```
+
+Would return:
+
+```xml
+
+ Lorem ipsum dolor sit.
+
+```
+
+Cutting it at the eleventh character...
+
+```php
+$Nokogiri->cut($xml, 11);
+```
+
+Would return:
+
+```xml
+
+ Lorem ipsum
+
+```
+
+Note that the blank characters between tags are not taken into account.
+
+Pitfalls
+--------
+
+For now, Nokogori won't work properly with string that are not enclosed
+in a proper tag, like that:
+
+```xml
+
+ Some unenclosed string
+ Lorem ipsum
+
+```
+
+Notes
+-----
+
+The implementation is probably shitty, as I don't know anything about writing a
+decent parser...
+
+Also, the implementation of the parser itself is kind of tied to the class using
+it. It is obviously bad but it works :grin:
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..0918955
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,23 @@
+{
+ "name": "fg/nokogiri",
+ "description": "Cuts through XML like a breeze.",
+ "version": "0.1.0",
+ "authors": [{
+ "name": "FĂ©lix Girault",
+ "email": "felix.girault@gmail.com",
+ "homepage": "http://www.felix-girault.fr",
+ "role": "Developer"
+ }],
+ "keywords": [
+ "xml",
+ "cut",
+ "truncate"
+ ],
+ "license": "MIT",
+ "homepage": "http://github.com/felixgirault/essence",
+ "autoload": {
+ "psr-4": {
+ "Nokogiri\\": "src/"
+ }
+ }
+}
diff --git a/src/Nokogiri.php b/src/Nokogiri.php
new file mode 100644
index 0000000..63f0d6c
--- /dev/null
+++ b/src/Nokogiri.php
@@ -0,0 +1,78 @@
+on(
+ Parser::OPENED_TAG_EVENT,
+ function($tag) use (&$opened) {
+ $opened[] = $tag;
+ }
+ );
+
+ $Parser->on(
+ Parser::PARSED_TAG_CONTENTS_EVENT,
+ function($contents, $i) use (&$opened, &$count, &$position, $limit) {
+ if ($this->_isWhitespace($contents)) {
+ return;
+ }
+
+ $count += strlen($contents);
+
+ if ($count >= $limit) {
+ $position = $i - ($count - $limit);
+ return false;
+ }
+ }
+ );
+
+ $Parser->on(
+ Parser::CLOSED_TAG_EVENT,
+ function() use (&$opened) {
+ array_pop($opened);
+ }
+ );
+
+ $Parser->parse($xml);
+
+ return $position
+ ? $this->_enclose($xml, $position, $opened)
+ : $xml;
+ }
+
+ /**
+ *
+ */
+ protected function _isWhitespace($string) {
+ return preg_match('~\s+~i', $string);
+ }
+
+ /**
+ *
+ */
+ protected function _enclose($xml, $position, $tags) {
+ $xml = substr($xml, 0, $position);
+
+ while ($tag = array_pop($tags)) {
+ $xml .= "$tag>";
+ }
+
+ return $xml;
+ }
+}
diff --git a/src/Parser.php b/src/Parser.php
new file mode 100644
index 0000000..d5bb86c
--- /dev/null
+++ b/src/Parser.php
@@ -0,0 +1,180 @@
+_observers[$event] = $callback;
+ }
+
+ /**
+ *
+ */
+ protected function _emit($event) {
+ if (isset($this->_observers[$event])) {
+ $continue = call_user_func_array(
+ $this->_observers[$event],
+ array_slice(func_get_args(), 1)
+ );
+
+ $this->_continue = ($continue !== false);
+ }
+ }
+
+ /**
+ *
+ */
+ public function parse($xml) {
+ for ($i = 0; $i < strlen($xml); $i++) {
+ $char = $xml[$i];
+
+ switch ($this->_state) {
+ case self::PARSING_OPENING_TAG:
+ $this->_parseOpeningTag($char);
+ break;
+
+ case self::PARSING_TAG_ATTRIBUTES:
+ $this->_parseTagAttributes($char);
+ break;
+
+ case self::PARSING_TAG_CONTENTS:
+ $this->_parseTagContents($char, $i);
+ break;
+
+ case self::PARSING_CLOSING_TAG:
+ $this->_parseClosingTag($char);
+ break;
+ }
+
+ if (!$this->_continue) {
+ break;
+ }
+ }
+ }
+
+ /**
+ *
+ */
+ protected function _parseOpeningTag($char) {
+ switch ($char) {
+ // we're in fact parsing a closing tag
+ case '/':
+ $this->_state = self::PARSING_CLOSING_TAG;
+ break;
+
+ // we're beggining to parse attributes
+ // we know the name of the tag we just opened
+ case ' ':
+ $this->_state = self::PARSING_TAG_ATTRIBUTES;
+ $this->_emit(self::OPENED_TAG_EVENT, $this->_tagName);
+ $this->_tagName = '';
+ break;
+
+ // we're reaching the end of an opening tag
+ // we know the name of the tag we just opened
+ // we can begin to store the tag's contents for later use
+ case '>':
+ $this->_state = self::PARSING_TAG_CONTENTS;
+ $this->_emit(self::OPENED_TAG_EVENT, $this->_tagName);
+ $this->_tagName = '';
+ break;
+
+ // we're storing the tag's name for later use
+ default:
+ $this->_tagName .= $char;
+ break;
+ }
+ }
+
+ /**
+ *
+ */
+ protected function _parseTagAttributes($char) {
+ switch ($char) {
+ // we're reaching the end of an opening tag
+ // we can begin to store the tag's contents for later use
+ case '>':
+ $this->_state = self::PARSING_TAG_CONTENTS;
+ $this->_contents = '';
+ break;
+ }
+ }
+
+ /**
+ *
+ */
+ protected function _parseTagContents($char, $i) {
+ switch ($char) {
+ // we're reaching the start of a tag
+ // we can begin to store the tag's name for later use
+ case '<':
+ $this->_state = self::PARSING_OPENING_TAG;
+ $this->_emit(self::PARSED_TAG_CONTENTS_EVENT, $this->_contents, $i);
+ $this->_contents = '';
+ break;
+
+ // we're storing the tag's contents for later use
+ default:
+ $this->_contents .= $char;
+ break;
+ }
+ }
+
+ /**
+ *
+ */
+ protected function _parseClosingTag($char) {
+ switch ($char) {
+ // we're reaching the end of a closing tag
+ case '>':
+ $this->_state = self::PARSING_TAG_CONTENTS;
+ $this->_emit(self::CLOSED_TAG_EVENT);
+ break;
+ }
+ }
+}
diff --git a/tests/NokogiriTest.php b/tests/NokogiriTest.php
new file mode 100644
index 0000000..346c2fa
--- /dev/null
+++ b/tests/NokogiriTest.php
@@ -0,0 +1,50 @@
+Nokogiri = new Nokogiri\Nokogiri();
+ }
+
+ /**
+ *
+ */
+ public function testCut() {
+ $xml = <<
+ First
+ First
+
+ First
+ First
+
+
+XML;
+
+ $expected = <<
+ First
+ First
+
+ Fi
+XML;
+
+ $this->assertEquals(
+ $expected,
+ $this->Nokogiri->cut($xml, 12)
+ );
+ }
+}