Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lists, headings, todos + tests working #15

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .eggs/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
This directory contains eggs that were downloaded by setuptools to build, test, and run plug-ins.

This directory caches those eggs to prevent repeated downloads.

However, it is safe to delete this directory.

44 changes: 39 additions & 5 deletions htmlslacker/htmlslacker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
except ImportError:
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint
import re

LINEBR = "::LINEBR::"

Expand All @@ -23,6 +24,9 @@ def __init__(self, html, *args, **kwargs):
except TypeError:
HTMLParser.__init__(self, *args, **kwargs)
self.skip = False
self.isProcessingList = False
self.isProcessingOrderedList = False
self.orderedNumber = 0

# slackified string
self.output = ''
Expand All @@ -43,9 +47,11 @@ def handle_starttag(self, tag, attrs):
if tag == 'br' or tag == 'p':
self.output += LINEBR
if tag == 'b' or tag == 'strong':
self.output += '*'
self.output += ' *'
if re.match("h[1-6]{1}", tag):
self.output += ' *'
if tag == 'i' or tag == 'em':
self.output += '_'
self.output += ' _'
if tag == 'code':
self.output += '`'
if tag == 'a':
Expand All @@ -55,6 +61,16 @@ def handle_starttag(self, tag, attrs):
self.output += attr[1] + '|'
if tag == 'style' or tag == 'script':
self.skip = True
if tag == 'ul':
self.isProcessingList = True
if tag == 'li' and self.isProcessingList:
self.output += '• '
if tag == 'ol':
self.orderedNumber = 1
self.isProcessingOrderedList = True
if tag == 'li' and self.isProcessingOrderedList:
self.output += '{}. '.format(self.orderedNumber)
self.orderedNumber = self.orderedNumber + 1

def handle_endtag(self, tag):
"""
Expand All @@ -63,15 +79,25 @@ def handle_endtag(self, tag):
:return:
"""
if tag == 'b' or tag == 'strong':
self.output += '*'
self.output += '* '
if re.match("h[1-6]{1}", tag):
self.output += '* '+LINEBR
if tag == 'i' or tag == 'em':
self.output += '_'
self.output += '_ '
if tag == 'a':
self.output += '>'
if tag == 'code':
self.output += '`'
if tag == 'style' or tag == 'script':
self.skip = False
if tag == 'ul':
self.isProcessingList = False
if tag == 'li' and self.isProcessingList:
self.output += LINEBR
if tag == 'ol':
self.isProcessingOrderedList = False
if tag == 'li' and self.isProcessingOrderedList:
self.output += LINEBR

def handle_data(self, data):
"""
Expand Down Expand Up @@ -105,4 +131,12 @@ def get_output(self):
link: https://stackoverflow.com/questions/2077897/substitute-multiple-whitespace-with-single-whitespace-in-python
:return:
"""
return ' '.join(self.output.split()).replace(LINEBR, "\n")
output = self.output
output = re.sub(r'\*(\s\*)+', '*', output)
output = re.sub(r'_( _)+', '_', output)
output = output.replace('[] ', '☐ ').replace('[x] ', '☑︎ ')
output = ' '.join(output.split())
output = output.replace(LINEBR, "\n")
output = re.sub(r' *\n *', '\n', output)
output = output.strip()
return output
36 changes: 35 additions & 1 deletion test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_example_1():
link in a paragraph!</a>
</p>
"""
expected = "*Hello*\n There is _something_ interesting about `this doc` \n And <http://example.com/|here is a link in a paragraph!>"
expected = "*Hello*\nThere is _something_ interesting about `this doc`\nAnd <http://example.com/|here is a link in a paragraph!>"
output = HTMLSlacker(html).get_output()
assert(output == expected)

Expand All @@ -35,3 +35,37 @@ def test_link_with_target():
expected = "Please click <http://xxx.com/t.html|here>"
output = HTMLSlacker(html).get_output()
assert(output == expected)

def test_unordered_list():
html = 'Here is my cool list <ul><li>The Shining</li><li>Memento</li><li>Blade Runner</li></ul>'
expected = 'Here is my cool list • The Shining\n• Memento\n• Blade Runner'
output = HTMLSlacker(html).get_output()
assert(output == expected)

def test_ordered_list():
html = 'Here is my cool list <ol><li>The Shining</li><li>Memento</li><li>Blade Runner</li></ol>'
expected = 'Here is my cool list 1. The Shining\n2. Memento\n3. Blade Runner'
output = HTMLSlacker(html).get_output()
assert(output == expected)

def test_unordered_list_with_text_modifications():
html = 'Here is my cool list <ul><li>The Shining</li><li>Memento</li><li>Blade <b>Runner</b></li></ul>'
expected = 'Here is my cool list • The Shining\n• Memento\n• Blade *Runner*'

def test_headers_rendered():
html = '''<h2>Hello</h2> <h7>new</h7> <h2><b>world</b></h2>'''
expected = "*Hello*\nnew *world*"
output = HTMLSlacker(html).get_output()
assert(output == expected)

def test_headers_rendered_no_spaces():
html = '''<h2>Hello</h2><h7>new</h7><h2><b>world</b></h2>'''
expected = "*Hello*\nnew *world*"
output = HTMLSlacker(html).get_output()
assert(output == expected)

def test_task_list_rendered():
html = '''[] Grocery<br>[x] Laundary'''
expected = "☐ Grocery\n☑︎ Laundary"
output = HTMLSlacker(html).get_output()
assert(output == expected)