Mercurial > farol > farolluz

# -*- coding: utf-8 -*-
# Description:
# Methods for parsing CVRF documents
#
# Authors:
# Benoît Allard <benoit.allard@greenbone.net>
#
# Copyright:
# Copyright (C) 2014 Greenbone Networks GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

"""\
Methods for parsing of CVRF Documents
"""

from __future__ import print_function

import re
import textwrap
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta

try:
    from datetime import timezone
except ImportError:
    from ..py2 import FixedTimeZone as timezone

from ..cvrf import (CVRF, CVRFPublisher, CVRFTracking, CVRFRevision, CVRFNote,
    CVRFAcknowledgment, CVRFProductBranch, CVRFFullProductName, CVRFGenerator,
    CVRFRelationship, CVRFVulnerability, CVRFVulnerabilityID, CVRFThreat,
    CVRFProductStatus, CVRFCVSSSet, CVRFReference, CVRFRemediation, CVRFGroup,
    CVRFInvolvement, CVRFCWE, CVRFTrackingID, CVRFAggregateSeverity)

NAMESPACES = {
    'cvrf': "http://www.icasi.org/CVRF/schema/cvrf/1.1",
    'prod': "http://www.icasi.org/CVRF/schema/prod/1.1",
    'vuln': "http://www.icasi.org/CVRF/schema/vuln/1.1",
    'xml':  "http://www.w3.org/XML/1998/namespace",
}


def UN(ns, name):
    """ UN for Universal Name """
    return "{%s}%s" % (NAMESPACES[ns], name)


def parseVersion(string):
    return tuple(int(i) for i in string.split('.'))


def parseDate(string):
    m = re.match('(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:([+-])(\d{2}):(\d{2})|(Z))?', string)
    if (m.group(7) is None) or (m.group(7) == 'Z'):
        tzhours = 0
        tzmin = 0
    else:
        tzhours = int(m.group(8))
        if m.group(7) == '-':
            tzhours = - tzhours
        tzmin = int(m.group(9))
    return datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), int(m.group(6)), tzinfo=timezone(timedelta(hours=tzhours, minutes=tzmin)))


def parseNote(elem):
    return CVRFNote(
        elem.attrib['Type'],
        int(elem.attrib['Ordinal']),
        textwrap.dedent(elem.text).strip(),
        elem.attrib.get('Title'),
        elem.attrib.get('Audience')
    )


def parseReference(elem, ns='cvrf'):
    """ ns is the current namespace """
    return CVRFReference(
        elem.findtext(UN(ns, 'URL')).strip(),
        textwrap.dedent(elem.findtext(UN(ns, 'Description'))).strip(),
        elem.attrib.get('Type')
    )


def parseAcknowledgment(elem, ns='cvrf'):
    names = []
    for cvrfname in elem.findall(UN(ns, 'Name')):
        names.append(cvrfname.text.strip())
    orgs = []
    for cvrforg in elem.findall(UN(ns, 'Organization')):
        orgs.append(cvrforg.text.strip())
    return CVRFAcknowledgment(
        names, orgs,
        elem.findtext(UN(ns, 'Description')),
        elem.findtext(UN(ns, 'URL')),
    )


def parseFullProductName(elem, parent):
    return CVRFFullProductName(
        elem.attrib['ProductID'],
        elem.text.strip(),
        parent,
        cpe=elem.attrib.get('CPE')
    )


def parseProdBranch(elem, ptree, parentbranch=None):
    """ Recursively parses the branches and the terminal productnames """
    fpncvrf = elem.find(UN('prod', 'FullProductName'))
    if (parentbranch is not None) and (fpncvrf is not None):
        # Don't process the products at the root of the tree
        prod = parseFullProductName(fpncvrf, parentbranch)
        ptree.addProduct(prod)

    if parentbranch is None:
        parentbranch = ptree
    for brcvrf in elem.findall(UN('prod', 'Branch')):
        br = CVRFProductBranch(brcvrf.attrib['Type'], brcvrf.attrib['Name'], parentbranch)
        # And go into recursion ...
        br._childs = list(parseProdBranch(brcvrf, ptree, br))
        yield br


def parseVulnerability(elem):
    vuln = CVRFVulnerability(int(elem.attrib['Ordinal']))

    xmltitle = elem.findtext(UN('vuln', 'Title'))
    if xmltitle is not None:
        vuln.setTitle(xmltitle.strip())

    xmlID = elem.find(UN('vuln', 'ID'))
    if xmlID is not None:
        vuln.setID(CVRFVulnerabilityID(xmlID.attrib['SystemName'], xmlID.text.strip()))

    for xmlnote in elem.findall('/'.join([UN('vuln', 'Notes'), UN('vuln', 'Note')])):
        vuln.addNote(parseNote(xmlnote))

    xmldiscoverydate = elem.findtext(UN('vuln', 'DiscoveryDate'))
    if xmldiscoverydate is not None:
        vuln.setDiscoveryDate(parseDate(xmldiscoverydate))
    xmlreleasedate = elem.findtext(UN('vuln', 'ReleaseDate'))
    if xmlreleasedate is not None:
        vuln.setReleaseDate(parseDate(xmlreleasedate))

    for xmlinv in elem.findall('/'.join([UN('vuln', 'Involvements'), UN('vuln', 'Involvement')])):
        involvement = CVRFInvolvement(
            xmlinv.attrib['Party'],
            xmlinv.attrib['Status']
        )
        xmldescr = xmlinv.findtext(UN('vuln', 'Description'))
        if xmldescr is not None:
            involvement.setDescription(textwrap.dedent(xmldescr).strip())
        vuln.addInvolvement(involvement)

    xmlcve = elem.findtext(UN('vuln', 'CVE'))
    if xmlcve is not None:
        vuln.setCVE(xmlcve.strip())

    for xmlcwe in elem.findall(UN('vuln', 'CWE')):
        vuln.addCWE(CVRFCWE(
            xmlcwe.attrib['ID'],
            xmlcwe.text.strip()
        ))

    for xmlstatus in elem.findall('/'.join([UN('vuln', 'ProductStatuses'), UN('vuln', 'Status')])):
        status = CVRFProductStatus(xmlstatus.attrib['Type'])
        for xmlproductid in xmlstatus.findall(UN('vuln', 'ProductID')):
            status.addProductID(xmlproductid.text.strip())

        vuln.addProductStatus(status)

    for xmlthreat in elem.findall('/'.join([UN('vuln', 'Threats'), UN('vuln', 'Threat')])):
        threat = CVRFThreat(
            xmlthreat.attrib['Type'],
            textwrap.dedent(xmlthreat.findtext(UN('vuln', 'Description'))).strip()
        )
        xmldate = xmlthreat.findtext(UN('vuln', 'Date'))
        if xmldate is not None:
            threat.setDate(parseDate(xmldate))
        for xmlpid in xmlthreat.findall(UN('vuln', 'ProductID')):
            threat.addProductID(xmlpid.text.strip())
        for xmlgid in xmlthreat.findall(UN('vuln', 'GroupID')):
            threat.addGroupID(xmlgid.text.strip())

        vuln.addThreat(threat)

    for xmlcvss in elem.findall('/'.join([UN('vuln', 'CVSSScoreSets'), UN('vuln', 'ScoreSet')])):
        cvss_set = CVRFCVSSSet(float(xmlcvss.findtext(UN('vuln', 'BaseScore')).strip()))
        xmltempscore = xmlcvss.findtext(UN('vuln', 'TemporalScore'))
        if xmltempscore is not None:
            cvss_set.setTemporalScore(float(xmltempscore.strip()))
        xmlenvscore = xmlcvss.findtext(UN('vuln', 'EnvironmentalScore'))
        if xmlenvscore is not None:
            cvss_set.setEnvironmentalScore(float(xmlenvscore.strip()))
        xmlvector = xmlcvss.findtext(UN('vuln', 'Vector'))
        if xmlvector is not None:
            cvss_set.setVector(xmlvector.strip())
        for xmlprodid in xmlcvss.findall(UN('vuln', 'ProductID')):
            cvss_set.addProductID(xmlprodid.text.strip())

        vuln.addCVSSSet(cvss_set)

    for xmlremediation in elem.findall('/'.join([UN('vuln', 'Remediations'), UN('vuln', 'Remediation')])):
        remediation = CVRFRemediation(
            xmlremediation.attrib['Type'],
            textwrap.dedent(xmlremediation.findtext(UN('vuln', 'Description'))).strip()
        )
        xmldate = xmlremediation.findtext(UN('vuln', 'Date'))
        if xmldate is not None:
            remediation.setDate(parseDate(xmldate))
        xmlentitlement = xmlremediation.findtext(UN('vuln', 'Entitlement'))
        if xmlentitlement is not None:
            remediation.setEntitlement(textwrap.dedent(xmlentitlement).strip())
        xmlurl = xmlremediation.findtext(UN('vuln', 'URL'))
        if xmlurl is not None:
            remediation.setURL(xmlurl.strip())
        for xmlpid in xmlremediation.findall(UN('vuln', 'ProductID')):
            remediation.addProductID(xmlpid.text.strip())
        for xmlgid in xmlremediation.findall(UN('vuln', 'GroupID')):
            remediation.addGroupID(xmlgid.text.strip())

        vuln.addRemediation(remediation)

    for xmlref in elem.findall('/'.join([UN('vuln', 'References'), UN('vuln', 'Reference')])):
        vuln.addReference(parseReference(xmlref, 'vuln'))

    for xmlack in elem.findall('/'.join([UN('vuln', 'Acknowledgments'), UN('vuln', 'Acknowledgment')])):
        vuln.addAcknowledgment(parseAcknowledgment(xmlack, 'vuln'))

    return vuln


def parse(xml):
    if hasattr(xml, 'read'):
        xml = xml.read()
    cvrfdoc = ET.fromstring(xml)
    if cvrfdoc.tag != UN('cvrf', 'cvrfdoc'):
        raise ValueError('Not a CVRF document !')
    doc = CVRF(
        cvrfdoc.findtext(UN('cvrf', 'DocumentTitle')).strip(),
        cvrfdoc.findtext(UN('cvrf', 'DocumentType')).strip()
    )

    cvrfpub = cvrfdoc.find(UN('cvrf', 'DocumentPublisher'))
    if cvrfpub is not None:
        pub = CVRFPublisher(cvrfpub.attrib['Type'], cvrfpub.attrib.get('VendorID'))
        doc.setPublisher(pub)
        contact = cvrfpub.find(UN('cvrf', 'ContactDetails'))
        if contact is not None:
            pub.setContact(contact.text.strip())
        authority = cvrfpub.find(UN('cvrf', 'IssuingAuthority'))
        if authority is not None:
            pub.setAuthority(authority.text.strip())

    cvrftracking = cvrfdoc.find(UN('cvrf', 'DocumentTracking'))
    if cvrftracking is not None:
        identification = CVRFTrackingID(
            cvrftracking.findtext('/'.join([UN('cvrf', 'Identification'), UN('cvrf', 'ID')])).strip()
        )
        for cvrfalias in cvrftracking.findall('/'.join([UN('cvrf', 'Identification'), UN('cvrf', 'Alias')])):
            identification.addAlias(cvrfalias.text.strip())
        tracking = CVRFTracking(
            identification,
            cvrftracking.findtext(UN('cvrf', 'Status')).strip(),
            parseVersion(cvrftracking.findtext(UN('cvrf', 'Version')).strip()),
            parseDate(cvrftracking.findtext(UN('cvrf', 'InitialReleaseDate')).strip()),
            parseDate(cvrftracking.findtext(UN('cvrf', 'CurrentReleaseDate')).strip())
        )
        doc.setTracking(tracking)
        for cvrfrev in cvrftracking.findall('/'.join([UN('cvrf', 'RevisionHistory'), UN('cvrf', 'Revision')])):
            rev = CVRFRevision(
                parseVersion(cvrfrev.findtext(UN('cvrf', 'Number')).strip()),
                parseDate(cvrfrev.findtext(UN('cvrf', 'Date')).strip()),
                cvrfrev.findtext(UN('cvrf', 'Description')).strip(),
            )
            tracking.addRevision(rev)

        xmlgenerator = cvrftracking.find(UN('cvrf', 'Generator'))
        if xmlgenerator is not None:
            generator = CVRFGenerator()
            xmlengine = xmlgenerator.findtext(UN('cvrf', 'Engine'))
            if xmlengine is not None:
                generator.setEngine(xmlengine.strip())
            xmldate = xmlgenerator.findtext(UN('cvrf', 'Date'))
            if xmldate is not None:
                generator.setDate(parseDate(xmldate.strip()))
            tracking.setGenerator(generator)

    for cvrfnote in cvrfdoc.findall('/'.join([UN('cvrf', 'DocumentNotes'), UN('cvrf', 'Note')])):
        doc.addNote(parseNote(cvrfnote))

    distr = cvrfdoc.findtext(UN('cvrf', 'DocumentDistribution'))
    if distr is not None:
        doc.setDistribution(textwrap.dedent(distr).strip())

    # This is in a quite free format, not sure how to do something with it ...
    xmlaggsev = cvrfdoc.find(UN('cvrf', 'AggregateSeverity'))
    if xmlaggsev is not None:
        aggsev = CVRFAggregateSeverity(xmlaggsev.text.strip())
        if 'Namespace' in xmlaggsev.attrib:
            aggsev.setNamespace(xmlaggsev.attrib['Namespace'])
        doc.setAggregateSeverity(aggsev)

    for xmlref in cvrfdoc.findall('/'.join([UN('cvrf', 'DocumentReferences'), UN('cvrf', 'Reference')])):
        doc.addReference(parseReference(xmlref))

    for cvrfack in cvrfdoc.findall('/'.join([UN('cvrf', 'Acknowledgments'), UN('cvrf', 'Acknowledgment')])):
        doc.addAcknowledgment(parseAcknowledgment(cvrfack))

    # --- The ProductTree

    cvrfptree = cvrfdoc.find(UN('prod', 'ProductTree'))
    if cvrfptree is not None:
        producttree = doc.createProductTree()
        for branch in parseProdBranch(cvrfptree, producttree):
            producttree.addBranch(branch)

        for product in cvrfptree.findall(UN('prod', 'FullProductName')):
            producttree.addProduct(parseFullProductName(product, producttree))

        for cvrfrel in cvrfptree.findall(UN('prod', 'Relationship')):
            rel = CVRFRelationship(
                cvrfrel.attrib['ProductReference'],
                cvrfrel.attrib['RelationType'],
                cvrfrel.attrib['RelatesToProductReference']
            )
            producttree.addRelationship(rel)
            producttree.addProduct(parseFullProductName(cvrfrel.find(UN('prod', 'FullProductName')), rel))

        for xmlgroup in cvrfptree.findall('/'.join([UN('prod', 'ProductGroups'), UN('prod', 'Group')])):
            group = CVRFGroup(xmlgroup.attrib['GroupID'])
            xmldescr = xmlgroup.findtext(UN('prod', 'Description'))
            if xmldescr is not None:
                group.setDescription(textwrap.dedent(xmldescr).strip())
            for xmlpid in xmlgroup.findall(UN('prod', 'ProductID')):
                group.addProductID(xmlpid.text.strip())
            producttree.addGroup(group)

    # --- The Vulnerabilities

    for cvrfvuln in cvrfdoc.findall(UN('vuln', 'Vulnerability')):
        doc.addVulnerability(parseVulnerability(cvrfvuln))

    return doc


if __name__ == "__main__":
    import sys
    with open(sys.argv[1], 'rt') as f:
        cvrf = parse(f)
    cvrf.validate()
    print(cvrf)
    print(cvrf.getHighestCVSS()._vector)
    print(cvrf.getProductList())
    print(cvrf._producttree._branches)
#    print(cvrf._producttree._branches[0]._childs)
author	Benoît Allard <benoit.allard@greenbone.net>
date	Wed, 08 Oct 2014 12:44:20 +0200
parents	d62264a643fb
children	dcc946b30343