test_extractor.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. # -*- coding: utf-8 -*-
  2. # © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
  3. # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
  4. from lxml import etree
  5. from openerp.tests.common import TransactionCase
  6. class ExtractorCase(TransactionCase):
  7. def setUp(self):
  8. super(ExtractorCase, self).setUp()
  9. # Shortcut
  10. self.text_from_html = self.env["ir.fields.converter"].text_from_html
  11. def test_excerpts(self):
  12. """Text gets correctly extracted."""
  13. html = u"""
  14. <html>
  15. <body>
  16. <div class="this should not appear">
  17. <h1>I'm a title</h1>
  18. <p>I'm a paragraph</p>
  19. <small>¡Pues yo soy español!</small>
  20. </div>
  21. </body>
  22. </html>
  23. """
  24. self.assertEqual(
  25. self.text_from_html(html),
  26. u"I'm a title I'm a paragraph ¡Pues yo soy español!")
  27. self.assertEqual(
  28. self.text_from_html(html, 8),
  29. u"I'm a title I'm a paragraph ¡Pues yo…")
  30. self.assertEqual(
  31. self.text_from_html(html, 8, 31),
  32. u"I'm a title I'm a paragraph ¡P…")
  33. self.assertEqual(
  34. self.text_from_html(html, 7, ellipsis=""),
  35. u"I'm a title I'm a paragraph ¡Pues")
  36. def test_empty_html(self):
  37. """Empty HTML handled correctly."""
  38. self.assertEqual(self.text_from_html(""), "")
  39. with self.assertRaises(etree.XMLSyntaxError):
  40. self.text_from_html("", fail=True)
  41. def test_false_html(self):
  42. """``False`` HTML handled correctly."""
  43. self.assertEqual(self.text_from_html(False), "")
  44. with self.assertRaises(TypeError):
  45. self.text_from_html(False, fail=True)
  46. def test_bad_html(self):
  47. """Bad HTML handled correctly."""
  48. self.assertEqual(self.text_from_html("<<bad>"), "")
  49. with self.assertRaises(etree.ParserError):
  50. self.text_from_html("<<bad>", fail=True)