diff --git a/vulnerabilities/importers/apache_httpd.py b/vulnerabilities/importers/apache_httpd.py index 75099ab8f..53d3ba023 100644 --- a/vulnerabilities/importers/apache_httpd.py +++ b/vulnerabilities/importers/apache_httpd.py @@ -26,6 +26,7 @@ from vulnerabilities.severity_systems import APACHE_HTTPD from vulnerabilities.utils import create_weaknesses_list from vulnerabilities.utils import cwe_regex +from vulnerabilities.utils import get_http_headers from vulnerabilities.utils import get_item logger = logging.getLogger(__name__) @@ -41,7 +42,7 @@ class ApacheHTTPDImporter(Importer): def advisory_data(self): links = fetch_links(self.base_url) for link in links: - data = requests.get(link).json() + data = requests.get(link, headers=get_http_headers()).json() yield self.to_advisory(data) def to_advisory(self, data): @@ -150,7 +151,7 @@ def to_version_ranges(self, versions_data, fixed_versions): def fetch_links(url): links = [] - data = requests.get(url).content + data = requests.get(url, headers=get_http_headers()).content soup = BeautifulSoup(data, features="lxml") for tag in soup.find_all("a"): link = tag.get("href") diff --git a/vulnerabilities/importers/debian_oval.py b/vulnerabilities/importers/debian_oval.py index f5a747a11..8f1129e31 100644 --- a/vulnerabilities/importers/debian_oval.py +++ b/vulnerabilities/importers/debian_oval.py @@ -14,6 +14,7 @@ import requests from vulnerabilities.importer import OvalImporter +from vulnerabilities.utils import get_http_headers class DebianOvalImporter(OvalImporter): @@ -68,7 +69,7 @@ def _fetch(self): for release in releases: file_url = f"https://www.debian.org/security/oval/oval-definitions-{release}.xml.bz2" self.data_url = file_url - resp = requests.get(file_url).content + resp = requests.get(file_url, headers=get_http_headers()).content extracted = bz2.decompress(resp) yield ( {"type": "deb", "namespace": "debian", "qualifiers": {"distro": release}}, diff --git a/vulnerabilities/importers/openssl.py b/vulnerabilities/importers/openssl.py index b71206418..2b59d1ca7 100644 --- a/vulnerabilities/importers/openssl.py +++ b/vulnerabilities/importers/openssl.py @@ -25,6 +25,7 @@ from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.utils import get_http_headers logger = logging.getLogger(__name__) @@ -36,7 +37,7 @@ class OpensslImporter(Importer): importer_name = "OpenSSL Importer" def fetch(self): - response = requests.get(url=self.url) + response = requests.get(url=self.url, headers=get_http_headers()) if not response.status_code == 200: logger.error(f"Error while fetching {self.url}: {response.status_code}") return diff --git a/vulnerabilities/importers/suse_backports.py b/vulnerabilities/importers/suse_backports.py index e7863e7e7..1d912b607 100644 --- a/vulnerabilities/importers/suse_backports.py +++ b/vulnerabilities/importers/suse_backports.py @@ -15,12 +15,13 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer from vulnerabilities.utils import create_etag +from vulnerabilities.utils import get_http_headers class SUSEBackportsImporter(Importer): @staticmethod def get_all_urls_of_backports(url): - r = requests.get(url) + r = requests.get(url, headers=get_http_headers()) soup = BeautifulSoup(r.content, "lxml") for a_tag in soup.find_all("a", href=True): if a_tag["href"].endswith(".yaml") and a_tag["href"].startswith("backports"): @@ -38,7 +39,7 @@ def updated_advisories(self): def _fetch_yaml(self, url): try: - resp = requests.get(url) + resp = requests.get(url, headers=get_http_headers()) resp.raise_for_status() return saneyaml.load(resp.content) diff --git a/vulnerabilities/importers/suse_oval.py b/vulnerabilities/importers/suse_oval.py index 0722682f7..d511f33d3 100644 --- a/vulnerabilities/importers/suse_oval.py +++ b/vulnerabilities/importers/suse_oval.py @@ -15,6 +15,7 @@ from bs4 import BeautifulSoup from vulnerabilities.importer import OvalImporter +from vulnerabilities.utils import get_http_headers class SuseOvalImporter(OvalImporter): @@ -27,7 +28,7 @@ def __init__(self, *args, **kwargs): self.translations = {"less than": "<", "equals": "=", "greater than or equal": ">="} def _fetch(self): - page = requests.get(self.base_url).text + page = requests.get(self.base_url, headers=get_http_headers()).text soup = BeautifulSoup(page, "lxml") suse_oval_files = [ @@ -37,7 +38,7 @@ def _fetch(self): ] for suse_file in filter(suse_oval_files): - response = requests.get(suse_file) + response = requests.get(suse_file, headers=get_http_headers()) extracted = gzip.decompress(response.content) yield ( diff --git a/vulnerabilities/importers/ubuntu.py b/vulnerabilities/importers/ubuntu.py index e47515b93..78d86406b 100644 --- a/vulnerabilities/importers/ubuntu.py +++ b/vulnerabilities/importers/ubuntu.py @@ -14,6 +14,7 @@ import requests from vulnerabilities.importer import OvalImporter +from vulnerabilities.utils import get_http_headers logger = logging.getLogger(__name__) @@ -77,7 +78,7 @@ def _fetch(self): file_url = f"{base_url}/com.ubuntu.{release}.cve.oval.xml.bz2" # nopep8 self.data_url = file_url logger.info(f"Fetching Ubuntu Oval: {file_url}") - response = requests.get(file_url) + response = requests.get(file_url, headers=get_http_headers()) if response.status_code != requests.codes.ok: logger.error( f"Failed to fetch Ubuntu Oval: HTTP {response.status_code} : {file_url}" diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index f90d42401..b03534997 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -44,6 +44,20 @@ logger = logging.getLogger(__name__) +# User-Agent string for all HTTP requests made by VulnerableCode +VULNERABLECODE_USER_AGENT = "VulnerableCode/37.0.0 (https://github.com/aboutcode-org/vulnerablecode)" + + +def get_http_headers(extra_headers=None): + """ + Return HTTP headers with the VulnerableCode User-Agent. + Optionally merge with extra_headers if provided. + """ + headers = {"User-Agent": VULNERABLECODE_USER_AGENT} + if extra_headers: + headers.update(extra_headers) + return headers + cve_regex = re.compile(r"CVE-[0-9]{4}-[0-9]{4,19}", re.IGNORECASE) is_cve = cve_regex.match find_all_cve = cve_regex.findall @@ -75,7 +89,7 @@ def load_toml(path): def fetch_yaml(url): - response = requests.get(url) + response = requests.get(url, headers=get_http_headers()) return saneyaml.load(response.content) @@ -113,7 +127,7 @@ def contains_alpha(string): def requests_with_5xx_retry(max_retries=5, backoff_factor=0.5): """ Returns a requests sessions which retries on 5xx errors with - a backoff_factor + a backoff_factor. The session includes the VulnerableCode User-Agent header. """ retries = urllib3.Retry( total=max_retries, @@ -123,6 +137,7 @@ def requests_with_5xx_retry(max_retries=5, backoff_factor=0.5): ) adapter = requests.adapters.HTTPAdapter(max_retries=retries) session = requests.Session() + session.headers.update(get_http_headers()) session.mount("https://", adapter) session.mount("http://", adapter) return session @@ -284,7 +299,7 @@ def _get_gh_response(gh_token, graphql_query): Convenience function to easy mocking in tests """ endpoint = "https://api.github.com/graphql" - headers = {"Authorization": f"bearer {gh_token}"} + headers = get_http_headers({"Authorization": f"bearer {gh_token}"}) try: return requests.post(endpoint, headers=headers, json=graphql_query).json() except Exception as e: @@ -390,7 +405,7 @@ def fetch_response(url): Fetch and return `response` from the `url` """ try: - response = requests.get(url) + response = requests.get(url, headers=get_http_headers()) if response.status_code == HTTPStatus.OK: return response raise Exception(