From ffb6936d2acb1f5086d0443b3e17fc77ef9ec64e Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Wed, 9 Oct 2024 14:32:48 +0200 Subject: [PATCH 01/10] chore: switch to python version 3.11 --- .python-version | 2 +- .travis.yml | 3 ++- setup.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.python-version b/.python-version index 424e179..2c07333 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.6.8 +3.11 diff --git a/.travis.yml b/.travis.yml index 1ef825f..05a231f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: python +dist: jammy python: - - "3.6" + - "3.11" # command to install dependencies install: - make dev diff --git a/setup.py b/setup.py index 6f1ddb5..40d77c4 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ def read(f): setup( name='content-validator', version=version, + python_requires='>=3.11', description=('Content validator looks at text content and preforms different validation tasks'), classifiers=[ 'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Programming Language :: Python' From 7cc8c2cce00d0a5bd7ab84d75330aaf66db04fa8 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Wed, 9 Oct 2024 14:50:15 +0200 Subject: [PATCH 02/10] chore: switch test runner to pytest --- Makefile | 14 +++++++++----- setup.py | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index b16471c..4e7b15f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,9 @@ PYTHON=venv/bin/python3 PIP=venv/bin/pip -NOSE=venv/bin/nosetests +COVERAGE=venv/bin/coverage +TEST_RUNNER=venv/bin/pytest +TEST_RUNNER_FLAGS=-s --durations=3 --durations-min=0.005 FLAKE=venv/bin/flake8 PYPICLOUD_HOST=pypicloud.getkeepsafe.local PIP_ARGS=--extra-index=http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST) @@ -30,14 +32,16 @@ flake: $(FLAKE) validator tests test: flake - $(NOSE) -s $(FLAGS) + $(COVERAGE) run -m pytest $(TEST_RUNNER_FLAGS) vtest: - $(NOSE) -s -v $(FLAGS) + $(COVERAGE) run -m pytest -v $(TEST_RUNNER_FLAGS) + +testloop: + while sleep 1; do $(TEST_RUNNER) -s --lf $(TEST_RUNNER_FLAGS); done cov cover coverage: - $(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS) - echo "open file://`pwd`/coverage/index.html" + $(COVERAGE) report -m clean: rm -rf `find . -name __pycache__` diff --git a/setup.py b/setup.py index 40d77c4..4ae8ac4 100644 --- a/setup.py +++ b/setup.py @@ -19,9 +19,9 @@ def read(f): ] tests_require = [ - 'nose', - 'flake8==3.6.0', - 'coverage', + 'pytest >= 8', + 'coverage==7.6.1', + 'flake8==7.1.1', ] devtools_require = [ From 3efae4d4dccf39b06f26884d4cbbab7a92377e41 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Wed, 9 Oct 2024 18:43:52 +0200 Subject: [PATCH 03/10] refactors: using pyupgrade --- tests/utils.py | 1 - validator/__init__.py | 10 +++++----- validator/checks/__init__.py | 4 ++-- validator/checks/java.py | 2 +- validator/checks/md.py | 4 ++-- validator/checks/url.py | 24 ++++++++++++------------ validator/errors.py | 6 +++--- validator/fs.py | 4 ++-- validator/parsers.py | 12 ++++++------ validator/reports.py | 26 +++++++++++++------------- 10 files changed, 46 insertions(+), 47 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 3948220..e3aa96c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,4 +1,3 @@ - def read(path): with open(path) as fp: return fp.read() diff --git a/validator/__init__.py b/validator/__init__.py index c7301e4..42d7083 100644 --- a/validator/__init__.py +++ b/validator/__init__.py @@ -3,7 +3,7 @@ from . import parsers, checks, reports, fs -class Validator(object): +class Validator: def __init__(self, contents, parser, reader, check, reporter=None): self.contents = contents self.parser = parser @@ -24,7 +24,7 @@ async def async_validate(self): return errors -class ReportBuilder(object): +class ReportBuilder: def __init__(self, contents, parser, reader, check): self.contents = contents self.parser = parser @@ -49,7 +49,7 @@ def validate(self): return Validator(self.contents, self.parser, self.reader, self.check, reporter).validate() -class CheckBuilder(object): +class CheckBuilder: def __init__(self, contents, content_type, parser, reader): self.contents = contents self.content_type = content_type @@ -89,7 +89,7 @@ async def async_validate(self): return res -class ParserBuilder(object): +class ParserBuilder: def __init__(self, contents, reader=None): self.contents = contents self.content_type = 'txt' @@ -120,7 +120,7 @@ def check(self): return CheckBuilder(self.contents, self.content_type, parser, self.reader) -class ContentBuilder(object): +class ContentBuilder: def files(self, pattern, **kwargs): contents = fs.files(pattern, **kwargs) return ParserBuilder(contents, parsers.FileReader()) diff --git a/validator/checks/__init__.py b/validator/checks/__init__.py index 9dc692f..85bd1e8 100644 --- a/validator/checks/__init__.py +++ b/validator/checks/__init__.py @@ -21,7 +21,7 @@ def url_occurences(filetype): return UrlOccurenciesValidator() -def markdown(filetype, md_parser_cls: Type[MdParser] = MdParser): +def markdown(filetype, md_parser_cls: type[MdParser] = MdParser): if filetype not in ['txt', 'html']: raise UndefinedCheckTypeError('got filetype %s' % filetype) return MarkdownComparator(md_parser_cls) @@ -33,7 +33,7 @@ def java_args(filetype): return JavaComparator() -class ChainCheck(object): +class ChainCheck: def __init__(self, checks): self.checks = checks diff --git a/validator/checks/java.py b/validator/checks/java.py index 319ec2d..70cfd0d 100644 --- a/validator/checks/java.py +++ b/validator/checks/java.py @@ -6,7 +6,7 @@ REF_PATTERN = r'@string/\w+' -class JavaComparator(object): +class JavaComparator: def _get_args(self, content): return re.findall(ARG_PATTERN, content) diff --git a/validator/checks/md.py b/validator/checks/md.py index d8542a8..bb9ff9d 100644 --- a/validator/checks/md.py +++ b/validator/checks/md.py @@ -14,8 +14,8 @@ def save_file(content, filename): fp.write(content) -class MarkdownComparator(object): - def __init__(self, md_parser_cls: Type[MdParser] = MdParser): +class MarkdownComparator: + def __init__(self, md_parser_cls: type[MdParser] = MdParser): self._md_parser_cls = md_parser_cls def check(self, data, parser, reader): diff --git a/validator/checks/url.py b/validator/checks/url.py index dabd816..375899d 100644 --- a/validator/checks/url.py +++ b/validator/checks/url.py @@ -23,7 +23,7 @@ class MissingUrlExtractorError(Exception): # the job of extractors is to find all non-parametrized urls in the given text for later checks via UrlValidator # which examines is particular url leads to working webpage (200 status) # since we are interested in all urls (including parametrized) we need to sligthly change their API and behaviour -class TextUrlExtractor(object): +class TextUrlExtractor: def __init__(self, **kwargs): pass @@ -60,12 +60,12 @@ def _validate_email(self, email): return False def _extract_from_anchors(self, soup): - return set([a.get('href') or a.text for a in soup.find_all('a')]) + return {a.get('href') or a.text for a in soup.find_all('a')} def _extract_from_img(self, soup): if self.skip_images: return set() - return set([img.get('src') for img in soup.find_all('img')]) + return {img.get('src') for img in soup.find_all('img')} def _fix_url(self, url): result = '' @@ -82,7 +82,7 @@ def _fix_url(self, url): if re.match(self.url_pattern, full_url): result = full_url else: - logging.error('{} not tested'.format(url_parsed.geturl())) + logging.error(f'{url_parsed.geturl()} not tested') return result def extract_urls(self, content, keep_placeholders=False): @@ -96,20 +96,20 @@ def extract_urls(self, content, keep_placeholders=False): return result -class UrlStatusChecker(object): +class UrlStatusChecker: retry_max_count = 3 - def __init__(self, headers=None, exclude_urls_regexs: Optional[List[str]] = None): + def __init__(self, headers=None, exclude_urls_regexs: list[str] | None = None): self._exclude_urls_regex = exclude_urls_regexs or [] if self._exclude_urls_regex: - logging.warning('Excluded urls regexps: {}'.format(self._exclude_urls_regex)) + logging.warning(f'Excluded urls regexps: {self._exclude_urls_regex}') self._headers = headers or {} if 'User-Agent' not in self._headers: self._headers['User-Agent'] = DEFAULT_USER_AGENT async def _make_request(self, url): try: - logging.info('checking {}'.format(url)) + logging.info(f'checking {url}') async with aiohttp.request('get', url, headers=self._headers, allow_redirects=True) as res: return res.status except Exception: @@ -143,7 +143,7 @@ async def _check_urls_coro(self, urls, future): if not is_exluded: urls_without_excluded.append(url) else: - logging.warning('url {} excluded from status check'.format(url.url)) + logging.warning(f'url {url.url} excluded from status check') tasks = [self._request_status_code(url.url) for url in urls_without_excluded] results = await asyncio.gather(*tasks) for index, url in enumerate(urls_without_excluded): @@ -167,10 +167,10 @@ async def async_check(self, urls): return future.result() -class UrlValidator(object): +class UrlValidator: _extractors = {'txt': TextUrlExtractor, 'html': HtmlUrlExtractor} - def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional[List[str]] = None, **kwargs): + def __init__(self, filetype, headers=None, exclude_status_check_regexs: list[str] | None = None, **kwargs): self.client_headers = headers or {} self._excluded_status_check_regexs = exclude_status_check_regexs or [] extractor_class = self._extractors.get(filetype) @@ -179,7 +179,7 @@ def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional self.extractor = extractor_class(**kwargs) def _get_urls(self, data, parser, reader): - flat_data = set(p for sublist in data for p in sublist) + flat_data = {p for sublist in data for p in sublist} # TODO yield instead urls = {} for element in flat_data: diff --git a/validator/errors.py b/validator/errors.py index 082885a..9972f88 100644 --- a/validator/errors.py +++ b/validator/errors.py @@ -1,7 +1,7 @@ from collections import namedtuple -class UrlDiff(object): +class UrlDiff: def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False): self.url = url @@ -10,7 +10,7 @@ def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False) self.has_disallowed_chars = has_disallowed_chars def __str__(self): - return 'Url(%s, %s, %s, %s)' % (self.url, self.files, self.status_code, self.has_disallowed_chars) + return 'Url({}, {}, {}, {})'.format(self.url, self.files, self.status_code, self.has_disallowed_chars) def __repr__(self): return 'Url: %s' % self.url @@ -37,7 +37,7 @@ def is_valid(self): ContentData.__new__.__defaults__ = ('', ) * 2 -class MdDiff(object): +class MdDiff: def __init__(self, base, other, error_msgs): self.base = base diff --git a/validator/fs.py b/validator/fs.py index b87808d..6e12b16 100644 --- a/validator/fs.py +++ b/validator/fs.py @@ -91,10 +91,10 @@ def files(pattern, **kwargs): [[Path(path/to1/file1.txt), Path(path/to1/file2.txt)], [Path(path/to2/file1.txt), Path(path/to2/file2.txt)]] """ # extract named parameters from the pattern - params = set([p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p]) + params = {p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p} if params: if len(params - kwargs.keys()) > 0: - raise ValueError('missing parameters {} for pattern {}'.format(params - kwargs.keys(), pattern)) + raise ValueError(f'missing parameters {params - kwargs.keys()} for pattern {pattern}') return _params_pattern(pattern, params, **kwargs) else: return _no_params_pattern(pattern) diff --git a/validator/parsers.py b/validator/parsers.py index 379902b..3c87b89 100644 --- a/validator/parsers.py +++ b/validator/parsers.py @@ -9,22 +9,22 @@ def __init__(self, msg): super().__init__(msg) -class FileReader(object): +class FileReader: def read(self, path): return read_content(path) -class TxtReader(object): +class TxtReader: def read(self, content): return content -class MarkdownParser(object): +class MarkdownParser: def parse(self, content): return markdown.markdown(content) -class XmlParser(object): +class XmlParser: def __init__(self, query='*'): self.query = query @@ -38,12 +38,12 @@ def parse(self, content): return '\n\n'.join(texts) -class CsvParser(object): +class CsvParser: def parse(self, content): return '\n'.join(content.split(',')) -class ChainParser(object): +class ChainParser: def __init__(self, parsers): self.parsers = parsers diff --git a/validator/reports.py b/validator/reports.py index c052e62..a164f5b 100644 --- a/validator/reports.py +++ b/validator/reports.py @@ -6,7 +6,7 @@ from .errors import UrlDiff, MdDiff, UrlOccurencyDiff -class HtmlReporter(object): +class HtmlReporter: report_template = """ @@ -82,12 +82,12 @@ def __init__(self, output_directory='errors'): self.output_directory = output_directory def _add_content(self, soup, tag_id, content): - tags = soup.select('#{}'.format(tag_id)) + tags = soup.select(f'#{tag_id}') if tags and content: tags[0].append(content) else: - print('missing tag: %s, content %s' % (tag_id, content)) + print('missing tag: {}, content {}'.format(tag_id, content)) return soup # TODO just rewrite !!! @@ -99,7 +99,7 @@ def report(self, errors): # TODO use mustache for templates report_soup = BeautifulSoup(self.report_template, 'lxml') if isinstance(error, UrlDiff): - messages = ['{} returned with code {}'.format(error.url, error.status_code)] + messages = [f'{error.url} returned with code {error.status_code}'] self._add_content(report_soup, 'urls', '\n'.join(messages)) if isinstance(error, MdDiff): error_msgs = '
'.join(map(lambda i: str(i), error.error_msgs)) @@ -113,20 +113,20 @@ def report(self, errors): save_report(self.output_directory, error.other.original, report_soup.prettify()) -class ConsoleReporter(object): +class ConsoleReporter: def report(self, errors): for error in errors: if isinstance(error, UrlDiff): - print('{} returned with code {}'.format(error.url, error.status_code)) + print(f'{error.url} returned with code {error.status_code}') for path in error.files: - print('\t{}'.format(str(path))) + print(f'\t{str(path)}') print() if isinstance(error, MdDiff): - print('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) + print(f'Files are different:\n\t{str(error.base)}\n\t{str(error.other)}\n\n') -class StoreReporter(object): +class StoreReporter: def __init__(self): self.log = [] @@ -134,16 +134,16 @@ def __init__(self): def report(self, errors): for error in errors: if isinstance(error, UrlDiff): - self.log.append('%s returned with code %s for files' % (error.url, error.status_code)) + self.log.append('{} returned with code {} for files'.format(error.url, error.status_code)) for path in error.files: self.log.append('\t%s' % str(path)) if isinstance(error, MdDiff): - self.log.append('Files are different:\n\t%s\n\t%s\n\n' % (str(error.base), str(error.other))) + self.log.append('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) if isinstance(error, UrlOccurencyDiff): - self.log.append('Count of URLS in %s and %s are different' % (error.base_path, error.translation_path)) + self.log.append('Count of URLS in {} and {} are different'.format(error.base_path, error.translation_path)) -class ChainReporter(object): +class ChainReporter: def __init__(self, reporters): self.reporters = reporters From e29ebf73f7bfeab34c7e5c82f0800ecf076a022e Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 13:57:16 +0200 Subject: [PATCH 04/10] fixes: dependencies versions to be compatible with aiohttp --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 4ae8ac4..4ce6532 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ import os from setuptools import setup, find_packages - version = '0.7.2' @@ -11,11 +10,11 @@ def read(f): install_requires = [ 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff', - 'aiohttp >=3, <3.4', + 'aiohttp==3.8.5', 'Markdown', 'parse <= 1.8.2', 'beautifulsoup4 >=4, <5', - 'lxml >=3', + 'lxml<5', ] tests_require = [ From 25abeecb1ad5a8d15ab7e5ca38e8c8c86d6ef3cf Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 13:57:54 +0200 Subject: [PATCH 05/10] fixes: flake errors after running pyupgrade --- setup.py | 2 +- validator/checks/__init__.py | 2 -- validator/checks/md.py | 1 - validator/checks/url.py | 1 - validator/reports.py | 2 +- 5 files changed, 2 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 4ce6532..ae79eb2 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def read(f): install_requires = [ - 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff', + 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff', 'aiohttp==3.8.5', 'Markdown', 'parse <= 1.8.2', diff --git a/validator/checks/__init__.py b/validator/checks/__init__.py index 85bd1e8..61861f2 100644 --- a/validator/checks/__init__.py +++ b/validator/checks/__init__.py @@ -1,5 +1,3 @@ -from typing import Type - from sdiff import MdParser from .md import MarkdownComparator diff --git a/validator/checks/md.py b/validator/checks/md.py index bb9ff9d..70e6988 100644 --- a/validator/checks/md.py +++ b/validator/checks/md.py @@ -1,5 +1,4 @@ import re -from typing import Type from sdiff import diff, renderer, MdParser from markdown import markdown diff --git a/validator/checks/url.py b/validator/checks/url.py index 375899d..263a532 100644 --- a/validator/checks/url.py +++ b/validator/checks/url.py @@ -5,7 +5,6 @@ import string from bs4 import BeautifulSoup from urllib.parse import urlparse, urljoin -from typing import List, Optional from ..errors import UrlDiff, UrlOccurencyDiff diff --git a/validator/reports.py b/validator/reports.py index a164f5b..8100963 100644 --- a/validator/reports.py +++ b/validator/reports.py @@ -140,7 +140,7 @@ def report(self, errors): if isinstance(error, MdDiff): self.log.append('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) if isinstance(error, UrlOccurencyDiff): - self.log.append('Count of URLS in {} and {} are different'.format(error.base_path, error.translation_path)) + self.log.append(f'Count of URLS in {error.base_path} and {error.translation_path} are different') class ChainReporter: From 4e63ee78e7a3587bd34f3958aba7d24faabd220a Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 14:44:07 +0200 Subject: [PATCH 06/10] chore: bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ae79eb2..bd913c5 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import os from setuptools import setup, find_packages -version = '0.7.2' +version = '1.0.0' def read(f): From 2e861aaa3a2f6540116c56099339e1b07d12f897 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 20:39:15 +0200 Subject: [PATCH 07/10] adds: min coverage config --- .travis.yml | 1 + setup.cfg | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/.travis.yml b/.travis.yml index 05a231f..da12660 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,3 +8,4 @@ install: # command to run tests script: - make tests + - make coverage diff --git a/setup.cfg b/setup.cfg index 0945a29..15ae1fa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,3 +7,9 @@ ignore = F403 [pep8] max-line-length = 120 + +[coverage:run] +branch = True + +[coverage:report] +fail_under = 96 From 922285b870866545909521a17448eee5e6197986 Mon Sep 17 00:00:00 2001 From: Philipp Berner <374326+philippb@users.noreply.github.com> Date: Tue, 13 Jan 2026 23:14:39 -0800 Subject: [PATCH 08/10] chore: bump aiohttp to 3.13.2 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4036e54..711d251 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ Markdown html2text==2014.12.29 lxml==3.5 parse==1.8.2 -aiohttp==3.1.3 +aiohttp==3.13.2 diff --git a/setup.py b/setup.py index bd913c5..a9b0658 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def read(f): install_requires = [ 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff', - 'aiohttp==3.8.5', + 'aiohttp==3.13.2', 'Markdown', 'parse <= 1.8.2', 'beautifulsoup4 >=4, <5', From cf9ac6e3159b24e1a035352325a4cbc7ab75ac76 Mon Sep 17 00:00:00 2001 From: Philipp Berner <374326+philippb@users.noreply.github.com> Date: Tue, 13 Jan 2026 23:15:25 -0800 Subject: [PATCH 09/10] chore: bump lxml to 6.0.2 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 711d251..2994c24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ beautifulsoup4==4.4.1 Markdown html2text==2014.12.29 -lxml==3.5 +lxml==6.0.2 parse==1.8.2 aiohttp==3.13.2 diff --git a/setup.py b/setup.py index a9b0658..0fe9a7d 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ def read(f): 'Markdown', 'parse <= 1.8.2', 'beautifulsoup4 >=4, <5', - 'lxml<5', + 'lxml==6.0.2', ] tests_require = [ From 54953b50a4cebbc3255ab5f25d22092757e33017 Mon Sep 17 00:00:00 2001 From: Philipp Berner <374326+philippb@users.noreply.github.com> Date: Wed, 14 Jan 2026 08:07:28 -0800 Subject: [PATCH 10/10] chore: add CI and modernize test deps --- .github/workflows/ci.yml | 33 +++++++++++++++++++++++++++++++++ .python-version | 2 +- .travis.yml | 11 ----------- requirements-dev.txt | 6 +++--- requirements.txt | 3 ++- tests/test_parser.py | 17 +++++++++++++++++ tests/test_reports.py | 31 +++++++++++++++++++++++++++++++ validator/reports.py | 10 +++++++++- 8 files changed, 96 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml create mode 100644 tests/test_reports.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..34c0be6 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,33 @@ +name: CI + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: .python-version + + - name: Configure git for private deps + run: | + git config --global url."https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/".insteadOf "https://github.com/" + + - name: Install dependencies + run: | + python -m pip install -U pip + python -m pip install -r requirements-dev.txt + + - name: Lint + run: | + flake8 validator tests + + - name: Test + run: | + pytest -q diff --git a/.python-version b/.python-version index 2c07333..763b626 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.11 +3.12.12 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index da12660..0000000 --- a/.travis.yml +++ /dev/null @@ -1,11 +0,0 @@ -language: python -dist: jammy -python: - - "3.11" -# command to install dependencies -install: - - make dev -# command to run tests -script: - - make tests - - make coverage diff --git a/requirements-dev.txt b/requirements-dev.txt index d01e4c4..7f1b96a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -r requirements.txt -flake8==3.6.0 -nose -coverage +flake8==7.1.1 +pytest>=8 +coverage==7.6.1 diff --git a/requirements.txt b/requirements.txt index 2994c24..ae1e4e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -beautifulsoup4==4.4.1 +beautifulsoup4==4.14.3 +sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff Markdown html2text==2014.12.29 lxml==6.0.2 diff --git a/tests/test_parser.py b/tests/test_parser.py index 0e2104d..0116e2b 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,5 +1,7 @@ from unittest import TestCase +import pytest + from validator import parsers from tests.utils import read @@ -10,3 +12,18 @@ def test_xml_parsing(self): content = read('tests/fixtures/bugs/parser_bug.xml') parser = parsers.XmlParser() parser.parse(content) + + +def test_xml_parser_empty_returns_empty(): + parser = parsers.XmlParser() + + assert parser.parse(' ') == '' + + +def test_chain_parser_wraps_errors(): + parser = parsers.ChainParser([parsers.XmlParser()]) + + with pytest.raises(parsers.ParserError) as exc: + parser.parse('') + + assert 'error in content' in str(exc.value) diff --git a/tests/test_reports.py b/tests/test_reports.py new file mode 100644 index 0000000..4075ad7 --- /dev/null +++ b/tests/test_reports.py @@ -0,0 +1,31 @@ +from validator.errors import ContentData, MdDiff, UrlDiff +from validator.reports import HtmlReporter + + +def test_html_reporter_handles_url_diff(tmp_path): + reporter = HtmlReporter(output_directory=str(tmp_path)) + error = UrlDiff('http://example.com', files=['errors/source.md'], status_code=404) + + reporter.report([error]) + + report_path = tmp_path / 'errors' / 'source.html' + assert report_path.exists() + assert 'http://example.com returned with code 404' in report_path.read_text() + + +def test_html_reporter_writes_markdown_diff(tmp_path): + reporter = HtmlReporter(output_directory=str(tmp_path)) + base = ContentData('base.md', 'Base', 'Base', '

Base

') + other = ContentData('other.md', 'Other', 'Other', '

Other

') + error = MdDiff(base, other, ['Missing content']) + + reporter.report([error]) + + report_path = tmp_path / 'other.html' + assert report_path.exists() + content = report_path.read_text() + assert 'Missing content' in content + assert '' in content + assert '' in content + assert 'Base' in content + assert 'Other' in content diff --git a/validator/reports.py b/validator/reports.py index 8100963..e896dee 100644 --- a/validator/reports.py +++ b/validator/reports.py @@ -1,3 +1,5 @@ +from pathlib import Path + from bs4 import BeautifulSoup import shutil import markdown @@ -94,13 +96,17 @@ def _add_content(self, soup, tag_id, content): # TODO remove isinstance def report(self, errors): shutil.rmtree(self.output_directory, ignore_errors=True) + if self.output_directory: + Path(self.output_directory).mkdir(parents=True, exist_ok=True) for error in errors: # TODO save to different files for links and diff # TODO use mustache for templates report_soup = BeautifulSoup(self.report_template, 'lxml') + source_path = None if isinstance(error, UrlDiff): messages = [f'{error.url} returned with code {error.status_code}'] self._add_content(report_soup, 'urls', '\n'.join(messages)) + source_path = error.files[0] if error.files else 'url_errors' if isinstance(error, MdDiff): error_msgs = '
'.join(map(lambda i: str(i), error.error_msgs)) base = markdown.markdown(error.base.parsed) @@ -110,7 +116,9 @@ def report(self, errors): report_soup = self._add_content(report_soup, 'left_diff', BeautifulSoup(error.base.diff, 'lxml').body) report_soup = self._add_content(report_soup, 'right_diff', BeautifulSoup(error.other.diff, 'lxml').body) report_soup = self._add_content(report_soup, 'error_msgs', BeautifulSoup(error_msgs, 'lxml').body) - save_report(self.output_directory, error.other.original, report_soup.prettify()) + source_path = error.other.original + if source_path is not None: + save_report(self.output_directory, source_path, report_soup.prettify()) class ConsoleReporter: