%!s(int64=8) %!d(string=hai) anos · 336a0851bb
--- a/README.rst
+++ b/README.rst
@@ -0,0 +1,81 @@
 
				+.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg
			
 
				+   :target: http://www.gnu.org/licenses/agpl-3.0-standalone.html
			
 
				+   :alt: License: AGPL-3
			
 
				+
			
 
				+==========================
			
 
				+Image URLs from HTML field
			
 
				+==========================
			
 
				+
			
 
				+This module includes a method that extracts image URLs from any chunk of HTML,
			
 
				+in appearing order.
			
 
				+
			
 
				+Usage
			
 
				+=====
			
 
				+
			
 
				+This module just adds a technical utility, but nothing for the end user.
			
 
				+
			
 
				+If you are a developer and need this utility for your module, see these
			
 
				+examples and read the docs inside the code.
			
 
				+
			
 
				+Python example::
			
 
				+
			
 
				+    @api.multi
			
 
				+    def some_method(self):
			
 
				+        # Get images from an HTML field
			
 
				+        imgs = self.env["ir.fields.converter"].imgs_from_html(self.html_field)
			
 
				+        for url in imgs:
			
 
				+            # Do stuff with those URLs
			
 
				+            pass
			
 
				+
			
 
				+QWeb example::
			
 
				+
			
 
				+    <!-- Extract first image from a blog post -->
			
 
				+    <t t-foreach="env['ir.fields.converter']
			
 
				+                  .imgs_from_html(blog_post.content, 1)"
			
 
				+       t-as="url">
			
 
				+        <img t-att-href="url"/>
			
 
				+    </t>
			
 
				+
			
 
				+.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas
			
 
				+   :alt: Try me on Runbot
			
 
				+   :target: https://runbot.odoo-community.org/runbot/149/8.0
			
 
				+
			
 
				+Known issues / Roadmap
			
 
				+======================
			
 
				+
			
 
				+* The regexp to find the URL could be better.
			
 
				+
			
 
				+Bug Tracker
			
 
				+===========
			
 
				+
			
 
				+Bugs are tracked on `GitHub Issues
			
 
				+<https://github.com/OCA/server-tools/issues>`_. In case of trouble, please
			
 
				+check there if your issue has already been reported. If you spotted it first,
			
 
				+help us smashing it by providing a detailed and welcomed `feedback
			
 
				+<https://github.com/OCA/
			
 
				+server-tools/issues/new?body=module:%20
			
 
				+html_image_url_extractor%0Aversion:%20
			
 
				+8.0%0A%0A**Steps%20to%20reproduce**%0A-%20...%0A%0A**Current%20behavior**%0A%0A**Expected%20behavior**>`_.
			
 
				+
			
 
				+Credits
			
 
				+=======
			
 
				+
			
 
				+Contributors
			
 
				+------------
			
 
				+
			
 
				+* Jairo Llopis <yajo.sk8@gmail.com>
			
 
				+
			
 
				+Maintainer
			
 
				+----------
			
 
				+
			
 
				+.. image:: https://odoo-community.org/logo.png
			
 
				+   :alt: Odoo Community Association
			
 
				+   :target: https://odoo-community.org
			
 
				+
			
 
				+This module is maintained by the OCA.
			
 
				+
			
 
				+OCA, or the Odoo Community Association, is a nonprofit organization whose
			
 
				+mission is to support the collaborative development of Odoo features and
			
 
				+promote its widespread use.
			
 
				+
			
 
				+To contribute to this module, please visit https://odoo-community.org.
			
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from . import models
			
--- a/__openerp__.py
+++ b/__openerp__.py
@@ -0,0 +1,23 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+{
			
 
				+    "name": "Image URLs from HTML field",
			
 
				+    "summary": "Extract images found in any HTML field",
			
 
				+    "version": "8.0.1.0.0",
			
 
				+    "category": "Tools",
			
 
				+    "website": "https://grupoesoc.es",
			
 
				+    "author": "Grupo ESOC Ingeniería de Servicios, "
			
 
				+              "Odoo Community Association (OCA)",
			
 
				+    "license": "AGPL-3",
			
 
				+    "application": False,
			
 
				+    "installable": True,
			
 
				+    "external_dependencies": {
			
 
				+        "python": [
			
 
				+            "lxml.html",
			
 
				+        ],
			
 
				+    },
			
 
				+    "depends": [
			
 
				+        "base",
			
 
				+    ],
			
 
				+}
			
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from . import ir_fields_converter
			
--- a/models/ir_fields_converter.py
+++ b/models/ir_fields_converter.py
@@ -0,0 +1,71 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+import re
			
 
				+import logging
			
 
				+from lxml import etree, html
			
 
				+from openerp import api, models
			
 
				+
			
 
				+_logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class IrFieldsConverter(models.Model):
			
 
				+    _inherit = "ir.fields.converter"
			
 
				+
			
 
				+    @api.model
			
 
				+    def imgs_from_html(self, html_content, limit=None, fail=False):
			
 
				+        """Extract all images in order from an HTML field in a generator.
			
 
				+
			
 
				+        :param str html_content:
			
 
				+            HTML contents from where to extract the images.
			
 
				+
			
 
				+        :param int limit:
			
 
				+            Only get up to this number of images.
			
 
				+
			
 
				+        :param bool fail:
			
 
				+            If ``True``, exceptions will be raised.
			
 
				+        """
			
 
				+        # Parse HTML
			
 
				+        try:
			
 
				+            doc = html.fromstring(html_content)
			
 
				+        except (TypeError, etree.XMLSyntaxError, etree.ParserError):
			
 
				+            if fail:
			
 
				+                raise
			
 
				+            else:
			
 
				+                _logger.exception("Failure parsing this HTML:\n%s",
			
 
				+                                  html_content)
			
 
				+                return
			
 
				+
			
 
				+        # Required tools
			
 
				+        query = """
			
 
				+            //img[@src] |
			
 
				+            //*[contains(translate(@style, "BACKGROUND", "background"),
			
 
				+                         'background')]
			
 
				+               [contains(translate(@style, "URL", "url"), 'url(')]
			
 
				+        """
			
 
				+        rgx = r"""
			
 
				+            url\(\s*        # Start function
			
 
				+            (?P<url>[^)]*)  # URL string
			
 
				+            \s*\)           # End function
			
 
				+        """
			
 
				+        rgx = re.compile(rgx, re.IGNORECASE | re.VERBOSE)
			
 
				+
			
 
				+        # Loop through possible image URLs
			
 
				+        for lap, element in enumerate(doc.xpath(query)):
			
 
				+            if limit and lap >= limit:
			
 
				+                break
			
 
				+            if element.tag == "img":
			
 
				+                yield element.attrib["src"]
			
 
				+            else:
			
 
				+                for rule in element.attrib["style"].split(";"):
			
 
				+                    # Extract background image
			
 
				+                    parts = rule.split(":", 1)
			
 
				+                    try:
			
 
				+                        if parts[0].strip().lower() in {"background",
			
 
				+                                                        "background-image"}:
			
 
				+                            yield (rgx.search(parts[1])
			
 
				+                                   .group("url").strip("\"'"))
			
 
				+                    # Malformed CSS or no match for URL
			
 
				+                    except (IndexError, AttributeError):
			
 
				+                        pass
			
--- a/static/description/icon.png
+++ b/static/description/icon.png
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from . import test_extractor
			
--- a/tests/test_extractor.py
+++ b/tests/test_extractor.py
@@ -0,0 +1,69 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
			
 
				+# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
			
 
				+
			
 
				+from lxml import etree
			
 
				+from openerp.tests.common import TransactionCase
			
 
				+
			
 
				+
			
 
				+class ExtractorCase(TransactionCase):
			
 
				+    def setUp(self):
			
 
				+        super(ExtractorCase, self).setUp()
			
 
				+
			
 
				+        # Shortcut
			
 
				+        self.imgs_from_html = self.env["ir.fields.converter"].imgs_from_html
			
 
				+
			
 
				+    def test_mixed_images_found(self):
			
 
				+        """Images correctly found in <img> elements and backgrounds."""
			
 
				+        content = u"""
			
 
				+            <div>
			
 
				+                <!-- src-less img -->
			
 
				+                <img/>
			
 
				+                <p/>
			
 
				+                <img src="/path/0"/>
			
 
				+                <img src="/path/1"/>
			
 
				+                <img src="/path/2"/>
			
 
				+                <img src="/path/3"/>
			
 
				+                <section style="background : URL('/path/4');;background;ö;">
			
 
				+                    <div style='BACKGROUND-IMAGE:url(/path/5)'>
			
 
				+                        <p style="background:uRl(&quot;/path/6&quot;)">
			
 
				+                            <img src="/path/7"/>
			
 
				+                        </p>
			
 
				+                    </div>
			
 
				+                </section>
			
 
				+            </div>
			
 
				+            """
			
 
				+
			
 
				+        # Read all images
			
 
				+        for n, url in enumerate(self.imgs_from_html(content)):
			
 
				+            self.assertEqual("/path/%d" % n, url)
			
 
				+        self.assertEqual(n, 7)
			
 
				+
			
 
				+        # Read only first image
			
 
				+        for n, url in enumerate(self.imgs_from_html(content, 1)):
			
 
				+            self.assertEqual("/path/%d" % n, url)
			
 
				+        self.assertEqual(n, 0)
			
 
				+
			
 
				+    def test_empty_html(self):
			
 
				+        """Empty HTML handled correctly."""
			
 
				+        for laps, text in self.imgs_from_html(""):
			
 
				+            self.assertTrue(False)  # You should never get here
			
 
				+
			
 
				+        with self.assertRaises(etree.XMLSyntaxError):
			
 
				+            list(self.imgs_from_html("", fail=True))
			
 
				+
			
 
				+    def test_false_html(self):
			
 
				+        """``False`` HTML handled correctly."""
			
 
				+        for laps, text in self.imgs_from_html(False):
			
 
				+            self.assertTrue(False)  # You should never get here
			
 
				+
			
 
				+        with self.assertRaises(TypeError):
			
 
				+            list(self.imgs_from_html(False, fail=True))
			
 
				+
			
 
				+    def test_bad_html(self):
			
 
				+        """Bad HTML handled correctly."""
			
 
				+        for laps, text in self.imgs_from_html("<<bad>"):
			
 
				+            self.assertTrue(False)  # You should never get here
			
 
				+
			
 
				+        with self.assertRaises(etree.ParserError):
			
 
				+            list(self.imgs_from_html("<<bad>", fail=True))