9 سال پیش · 55f0b65775
--- a/README.rst
+++ b/README.rst
@@ -0,0 +1,82 @@
 
				+.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg
			
 
				+   :target: http://www.gnu.org/licenses/agpl-3.0-standalone.html
			
 
				+   :alt: License: AGPL-3
			
 
				+
			
 
				+====================
			
 
				+Text from HTML field
			
 
				+====================
			
 
				+
			
 
				+This module provides some technical features that allow to extract text from
			
 
				+any chunk of HTML, without HTML tags or attributes. You can chose either:
			
 
				+
			
 
				+* To truncate the result by amount of words or characters.
			
 
				+* To append an ellipsis (or any character(s)) at the end of the result.
			
 
				+
			
 
				+It can be used to easily generate excerpts.
			
 
				+
			
 
				+Usage
			
 
				+=====
			
 
				+
			
 
				+This module just adds a technical utility, but nothing for the end user.
			
 
				+
			
 
				+If you are a developer and need this utility for your module, see these
			
 
				+examples and read the docs inside the code.
			
 
				+
			
 
				+Python example::
			
 
				+
			
 
				+    @api.multi
			
 
				+    def some_method(self):
			
 
				+        # Get truncated text from an HTML field. It will 40 words and 100
			
 
				+        # characters at most, and will have "..." appended at the end if it
			
 
				+        # gets truncated.
			
 
				+        truncated_text = self.env["ir.fields.converter"].text_from_html(
			
 
				+            self.html_field, 40, 100, "...")
			
 
				+
			
 
				+QWeb example::
			
 
				+
			
 
				+    <t t-esc="env['ir.fields.converter'].text_from_html(doc.html_field)"/>
			
 
				+
			
 
				+.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas
			
 
				+   :alt: Try me on Runbot
			
 
				+   :target: https://runbot.odoo-community.org/runbot/149/8.0
			
 
				+
			
 
				+Known issues / Roadmap
			
 
				+======================
			
 
				+
			
 
				+* An option could be added to try to respect the basic HTML tags inside the
			
 
				+  excerpt (``<b>``, ``<i>``, ``<p>``, etc.).
			
 
				+
			
 
				+Bug Tracker
			
 
				+===========
			
 
				+
			
 
				+Bugs are tracked on `GitHub Issues
			
 
				+<https://github.com/OCA/server-tools/issues>`_. In case of trouble, please
			
 
				+check there if your issue has already been reported. If you spotted it first,
			
 
				+help us smashing it by providing a detailed and welcomed `feedback
			
 
				+<https://github.com/OCA/
			
 
				+server-tools/issues/new?body=module:%20
			
 
				+html_text%0Aversion:%20
			
 
				+8.0%0A%0A**Steps%20to%20reproduce**%0A-%20...%0A%0A**Current%20behavior**%0A%0A**Expected%20behavior**>`_.
			
 
				+
			
 
				+Credits
			
 
				+=======
			
 
				+
			
 
				+Contributors
			
 
				+------------
			
 
				+
			
 
				+* Jairo Llopis <[email protected]>
			
 
				+
			
 
				+Maintainer
			
 
				+----------
			
 
				+
			
 
				+.. image:: https://odoo-community.org/logo.png
			
 
				+   :alt: Odoo Community Association
			
 
				+   :target: https://odoo-community.org
			
 
				+
			
 
				+This module is maintained by the OCA.
			
 
				+
			
 
				+OCA, or the Odoo Community Association, is a nonprofit organization whose
			
 
				+mission is to support the collaborative development of Odoo features and
			
 
				+promote its widespread use.
			
 
				+
			
 
				+To contribute to this module, please visit https://odoo-community.org.
			
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from . import models
			
--- a/__openerp__.py
+++ b/__openerp__.py
@@ -0,0 +1,23 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+{
			
 
				+    "name": "Text from HTML field",
			
 
				+    "summary": "Generate excerpts from any HTML field",
			
 
				+    "version": "8.0.1.0.0",
			
 
				+    "category": "Tools",
			
 
				+    "website": "https://grupoesoc.es",
			
 
				+    "author": "Grupo ESOC Ingeniería de Servicios, "
			
 
				+              "Odoo Community Association (OCA)",
			
 
				+    "license": "AGPL-3",
			
 
				+    "application": False,
			
 
				+    "installable": True,
			
 
				+    "external_dependencies": {
			
 
				+        "python": [
			
 
				+            "lxml.html",
			
 
				+        ],
			
 
				+    },
			
 
				+    "depends": [
			
 
				+        "base",
			
 
				+    ],
			
 
				+}
			
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from . import ir_fields_converter
			
--- a/models/ir_fields_converter.py
+++ b/models/ir_fields_converter.py
@@ -0,0 +1,72 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+import logging
			
 
				+from lxml import etree, html
			
 
				+from openerp import api, models
			
 
				+
			
 
				+_logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class IrFieldsConverter(models.Model):
			
 
				+    _inherit = "ir.fields.converter"
			
 
				+
			
 
				+    @api.model
			
 
				+    def text_from_html(self, html_content, max_words=None, max_chars=None,
			
 
				+                       ellipsis=u"…", fail=False):
			
 
				+        """Extract text from an HTML field in a generator.
			
 
				+
			
 
				+        :param str html_content:
			
 
				+            HTML contents from where to extract the text.
			
 
				+
			
 
				+        :param int max_words:
			
 
				+            Maximum amount of words allowed in the resulting string.
			
 
				+
			
 
				+        :param int max_chars:
			
 
				+            Maximum amount of characters allowed in the resulting string. If
			
 
				+            you apply this limit, beware that the last word could get cut in an
			
 
				+            unexpected place.
			
 
				+
			
 
				+        :param str ellipsis:
			
 
				+            Character(s) to be appended to the end of the resulting string if
			
 
				+            it gets truncated after applying limits set in :param:`max_words`
			
 
				+            or :param:`max_chars`. If you want nothing applied, just set an
			
 
				+            empty string.
			
 
				+
			
 
				+        :param bool fail:
			
 
				+            If ``True``, exceptions will be raised. Otherwise, an empty string
			
 
				+            will be returned on failure.
			
 
				+        """
			
 
				+        # Parse HTML
			
 
				+        try:
			
 
				+            doc = html.fromstring(html_content)
			
 
				+        except (TypeError, etree.XMLSyntaxError, etree.ParserError):
			
 
				+            if fail:
			
 
				+                raise
			
 
				+            else:
			
 
				+                _logger.exception("Failure parsing this HTML:\n%s",
			
 
				+                                  html_content)
			
 
				+                return ""
			
 
				+
			
 
				+        # Get words
			
 
				+        words = u"".join(doc.xpath("//text()")).split()
			
 
				+
			
 
				+        # Truncate words
			
 
				+        suffix = max_words and len(words) > max_words
			
 
				+        if max_words:
			
 
				+            words = words[:max_words]
			
 
				+
			
 
				+        # Get text
			
 
				+        text = u" ".join(words)
			
 
				+
			
 
				+        # Truncate text
			
 
				+        suffix = suffix or max_chars and len(text) > max_chars
			
 
				+        if max_chars:
			
 
				+            text = text[:max_chars - (len(ellipsis) if suffix else 0)].strip()
			
 
				+
			
 
				+        # Append ellipsis if needed
			
 
				+        if suffix:
			
 
				+            text += ellipsis
			
 
				+
			
 
				+        return text
			
--- a/static/description/icon.png
+++ b/static/description/icon.png
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from . import test_extractor
			
--- a/tests/test_extractor.py
+++ b/tests/test_extractor.py
@@ -0,0 +1,59 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from lxml import etree
			
 
				+from openerp.tests.common import TransactionCase
			
 
				+
			
 
				+
			
 
				+class ExtractorCase(TransactionCase):
			
 
				+    def setUp(self):
			
 
				+        super(ExtractorCase, self).setUp()
			
 
				+
			
 
				+        # Shortcut
			
 
				+        self.text_from_html = self.env["ir.fields.converter"].text_from_html
			
 
				+
			
 
				+    def test_excerpts(self):
			
 
				+        """Text gets correctly extracted."""
			
 
				+        html = u"""
			
 
				+            <html>
			
 
				+                <body>
			
 
				+                    <div class="this should not appear">
			
 
				+                        <h1>I'm a title</h1>
			
 
				+                        <p>I'm a paragraph</p>
			
 
				+                        <small>¡Pues yo soy español!</small>
			
 
				+                    </div>
			
 
				+                </body>
			
 
				+            </html>
			
 
				+            """
			
 
				+
			
 
				+        self.assertEqual(
			
 
				+            self.text_from_html(html),
			
 
				+            u"I'm a title I'm a paragraph ¡Pues yo soy español!")
			
 
				+        self.assertEqual(
			
 
				+            self.text_from_html(html, 8),
			
 
				+            u"I'm a title I'm a paragraph ¡Pues yo…")
			
 
				+        self.assertEqual(
			
 
				+            self.text_from_html(html, 8, 31),
			
 
				+            u"I'm a title I'm a paragraph ¡P…")
			
 
				+        self.assertEqual(
			
 
				+            self.text_from_html(html, 7, ellipsis=""),
			
 
				+            u"I'm a title I'm a paragraph ¡Pues")
			
 
				+
			
 
				+    def test_empty_html(self):
			
 
				+        """Empty HTML handled correctly."""
			
 
				+        self.assertEqual(self.text_from_html(""), "")
			
 
				+        with self.assertRaises(etree.XMLSyntaxError):
			
 
				+            self.text_from_html("", fail=True)
			
 
				+
			
 
				+    def test_false_html(self):
			
 
				+        """``False`` HTML handled correctly."""
			
 
				+        self.assertEqual(self.text_from_html(False), "")
			
 
				+        with self.assertRaises(TypeError):
			
 
				+            self.text_from_html(False, fail=True)
			
 
				+
			
 
				+    def test_bad_html(self):
			
 
				+        """Bad HTML handled correctly."""
			
 
				+        self.assertEqual(self.text_from_html("<<bad>"), "")
			
 
				+        with self.assertRaises(etree.ParserError):
			
 
				+            self.text_from_html("<<bad>", fail=True)