Skip to content

Commit

Permalink
Add text_rich()
Browse files Browse the repository at this point in the history
  • Loading branch information
deedy5 committed Aug 1, 2024
1 parent eb8a14d commit 6142560
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 10 deletions.
32 changes: 27 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ Python binding to the rust [rust-html2text](https://github.com/jugglerchris/rust
- [Installation](#installation)
- [Usage](#usage)
- [text_markdown()](#1-text_markdown)
- [text_plain()](#2-text_plain)
- [text_plain()](#2-text_plain)
- [text_rich()](#3-text_rich)

## Installation

Expand All @@ -29,12 +30,12 @@ def text_markdown(html: str, width: int = 100):
"""
```
*example*
example:
```python
import html2text_rs
import requests

resp = requests.get("https://tmz.com")
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")

text_markdown = html2text_rs.text_markdown(resp.text)
print(text_markdown)
Expand All @@ -50,13 +51,34 @@ def text_plain(html: str, width: int = 100):
"""
```
*example*
example:
```python
import html2text_rs
import requests

resp = requests.get("https://tmz.com")
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")

text_plain = html2text_rs.text_plain(resp.text)
print(text_plain)
```
### 3. text_rich()
```python
def text_rich(html: str, width: int = 100):
"""Convert HTML to rich text.
Args:
html (str): input html text.
width (int): wrap text to width columns. Default is 100.
"""
```
example:
```python
import html2text_rs
import requests

resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")

text_rich = html2text_rs.text_rich(resp.text)
print(text_rich)
```
17 changes: 15 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use html2text::{from_read, from_read_with_decorator, render::text_renderer::TrivialDecorator};
use html2text::{
from_read, from_read_with_decorator,
render::text_renderer::{RichDecorator, TrivialDecorator},
};
use pyo3::prelude::*;

/// Convert HTML to markdown text
Expand All @@ -19,9 +22,19 @@ fn text_plain(html: String, width: usize, py: Python) -> PyResult<String> {
Ok(text)
}

/// Convert HTML to rich text
#[pyfunction]
#[pyo3(signature=(html, width=100))]
fn text_rich(html: String, width: usize, py: Python) -> PyResult<String> {
let text =
py.allow_threads(|| from_read_with_decorator(html.as_bytes(), width, RichDecorator::new()));
Ok(text)
}

#[pymodule]
fn html2text_rs(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(text_plain, m)?)?;
m.add_function(wrap_pyfunction!(text_markdown, m)?)?;
m.add_function(wrap_pyfunction!(text_plain, m)?)?;
m.add_function(wrap_pyfunction!(text_rich, m)?)?;
Ok(())
}
13 changes: 10 additions & 3 deletions tests/test_html2text_rs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import pytest

import html2text_rs
import html2text_rs # type: ignore


def test_text_markdown():
Expand All @@ -19,3 +17,12 @@ def test_text_plain():
assert (
result == expected_output
), f"\nExpected:\n {expected_output} \nGot:\n {result}"


def test_text_rich():
html = "<h1>Hello World</h1><p>This is a test.</p>"
expected_output = "# Hello World\n\nThis is a test.\n"
result = html2text_rs.text_rich(html, width=80)
assert (
result == expected_output
), f"\nExpected:\n {expected_output} \nGot:\n {result}"

0 comments on commit 6142560

Please sign in to comment.