view farolluz/parsers/cpe.py @ 41:bb1dd2a55643

CPE: Add a generic 'parse' method
author Benoît Allard <benoit.allard@greenbone.net>
date Mon, 29 Dec 2014 14:58:29 +0100
parents 1d9b2b06067e
children 3826f2701ff2
line wrap: on
line source
# -*- coding: utf-8 -*-
# Description:
# Methods for parsing CPEs
#
# Authors:
# BenoƮt Allard <benoit.allard@greenbone.net>
#
# Copyright:
# Copyright (C) 2014 Greenbone Networks GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

"""\
a cpe class to ease the creation of a producttree based on cpe

This is based on:

    NIST Interagency Report 7695
    Common Platform Enumeration: Naming Specification Version 2.3

CPE is a trademark of The MITRE Corporation.

"""

import re

PCT_MAP ={'!': "%21", '"': "%22", '#': "%23", '$': "%24", '%': "%25", '&': "%26",
          "'": "%27", '(': "%28", ')': "%29", '*': "%2a", '+': "%2b", ',': "%2c",
          '/': "%2f", ':': "%3a", ';': "%3b", '<': "%3c", "=": "%3d", '>': "%3e",
          '?': "%3f", '@': "%40", '[': "%5b", '\\': "%5c","]": "%5d", '^': "%5e",
          '`': "%60", '{': "%7b", '|': "%7c", '}': "%7d", "~": "%7e"}

PCT_MAP_i = dict((v, k) for k, v in PCT_MAP.iteritems())

def pct_encode(c):
    """ Returns the right percent-encoding of c """
    if c in "-.":
        return c
    return PCT_MAP[c]
    return {'!': "%21", '"': "%22", '#': "%23", '$': "%24", '%': "%25", '&': "%26",
            "'": "%27", '(': "%28", ')': "%29", '*': "%2a", '+': "%2b", ',': "%2c",
            "-": c, '.': c, '/': "%2f", ':': "%3a", ';': "%3b", '<': "%3c",
            "=": "%3d", '>': "%3e", '?': "%3f", '@': "%40", '[': "%5b", '\\': "%5c",
            "]": "%5d", '^': "%5e", '`': "%60", '{': "%7b", '|': "%7c", '}': "%7d",
            "~": "%7e"}[c]

def decode(s):
    if s == '':
        return ANY
    if s == '-':
        return NA
    s = s.lower()
    res = ""
    idx = 0
    embedded = False
    while idx < len(s):
        c = s[idx]
        if c in ".-~":
            res += "\\" + c
            embedded = True
        elif c != '%':
            res += c
            embedded = True
        else:
            form = s[idx:idx+3]
            if form == "%01":
                if (((idx == 0) or (idx == (len(s) - 3))) or
                    ( not embedded and (s[idx-4:idx-1] == "%01")) or
                    (embedded and (len(s) > idx + 6) and (s[idx+3:idx+6] == "%01"))):
                    res += '?'
                else:
                    raise ValueError
            elif form == "%02":
                if (idx == 0) or (idx == len(s) - 3):
                    res += '*'
                else:
                    raise ValueError
            else:
                res += '\\' + PCT_MAP_i[form]
            embedded = True
            idx += 2
        idx += 1
    return CPEAttribute(res)

def unbind_value_fs(s):
    if s == '*':
        return ANY
    if s == '-':
        return NA
    res = ""
    idx = 0
    embedded = False
    while idx < len(s):
        c = s[idx]
        if re.match("[a-zA-Z0-9_]", c) is not None:
            res += c
            embedded = True
        elif c == "\\":
            res += s[idx:idx+2]
            embedded = True
            idx += 1
        elif c == "*":
            if (idx == 0) or (idx == (len(s) - 1)):
                res += c
                embedded = True
            else:
                raise ValueError
        elif c == "?":
            if (((idx == 0) or (idx == (len(s) - 1))) or
                (not embedded and (s[idx - 1] == "?")) or
                (embedded and (s[idx + 1] == "?"))):
                res += c
                embedded = False
            else:
                raise ValueError
        else:
            res += "\\" + c
            embedded = True
        idx += 1
    return CPEAttribute(res)

class CPEAttribute(object):
    """ We need a special class to deal with ANY / NA / "string" """

    def __init__(self, value=None, any=False, na=False):
        self.any = any
        self.na = na
        self.value = value

    def bind_for_URI(self):
#        print self.any, self.na, self.value
        if self.any:
            return ""
        if self.na:
            return '-'
        return self.transform_for_uri()

    def transform_for_uri(self):
        res = ""
        idx = 0
        while idx < len(self.value):
            c = self.value[idx]
            if re.match("[a-zA-Z0-9_]", c) is not None:
                res += c
            elif c == '\\':
                idx += 1
                c = self.value[idx]
                res += pct_encode(c)
            elif c == '?':
                res += "%01"
            elif c == '*':
                res += "%02"
            idx += 1
        return res

    def bind_for_fs(self):
        if self.any:
            return "*"
        if self.na:
            return "-"
        return self.process_quoted_chars()

    def process_quoted_chars(self):
        res = ""
        idx = 0
        while idx < len(self.value):
            c = self.value[idx]
            if c != '\\':
                res += c
            else:
                idx += 1
                c = self.value[idx]
                if c in ".-_":
                    res += c
                else:
                    res += '\\' + c
            idx += 1
        return res

ANY = CPEAttribute(any=True)
NA = CPEAttribute(na=True)

class CPE(object):

    def __init__(self, part=None, vendor=None, product=None, version=None, update=None, edition=None, language=None, sw_edition=None, target_sw=None, target_hw=None, other=None):
        self.part = part or CPEAttribute(any=True)
        self.vendor = vendor or CPEAttribute(any=True)
        self.product = product or CPEAttribute(any=True)
        self.version = version or CPEAttribute(any=True)
        self.update = update or CPEAttribute(any=True)
        self.edition = edition or CPEAttribute(any=True)
        self.language = language or CPEAttribute(any=True)
        # Extended attributes:
        self.sw_edition = sw_edition or CPEAttribute(any=True)
        self.target_sw = target_sw or CPEAttribute(any=True)
        self.target_hw = target_hw or CPEAttribute(any=True)
        self.other = other or CPEAttribute(any=True)

    def bind_to_URI(self):
        uri = 'cpe:/'
        uri += ':'.join(a.bind_for_URI() for a in (self.part, self.vendor, self.product, self.version, self.update))
        # Special handling for edition
        ed = self.edition.bind_for_URI()
        sw_ed = self.sw_edition.bind_for_URI()
        t_sw = self.target_sw.bind_for_URI()
        t_hw = self.target_hw.bind_for_URI()
        oth = self.other.bind_for_URI()
        if sw_ed == "" and t_sw == "" and t_hw == "" and oth == "":
            uri += ":" + ed
        else:
            uri += ":~" + '~'.join([ed, sw_ed, t_sw, t_hw, oth])
        uri += ':' + self.language.bind_for_URI()
        return uri.rstrip(':')

    def unbind_URI(self, uri):
        for idx, comp in enumerate(uri.split(':')):
            if idx == 0:
                continue
            elif idx == 1:
                self.part = decode(comp[1:])
            elif idx == 2:
                self.vendor = decode(comp)
            elif idx == 3:
                self.product = decode(comp)
            elif idx == 4:
                self.version = decode(comp)
            elif idx == 5:
                self.update = decode(comp)
            elif idx == 6:
                if comp == "" or comp[0] != '~':
                    self.edition = decode(comp)
                else:
                    ed, sw_ed, t_sw, t_hw, oth = comp[1:].split('~')
                    self.edition = decode(ed)
                    self.sw_edition = decode(sw_ed)
                    self.target_sw = decode(t_sw)
                    self.target_hw = decode(t_hw)
                    self.other = decode(oth)
            elif idx == 7:
                self.language = decode(comp)

    def bind_to_fs(self):
        fs = 'cpe:2.3:'
        fs += ':'.join(a.bind_for_fs() for a in (self.part, self.vendor, self.product, self.version, self.update, self.edition, self.language, self.sw_edition, self.target_sw, self.target_hw, self.other))
        return fs

    def unbind_fs(self, fs):
        for idx, v in enumerate(fs.split(':')):
            v = unbind_value_fs(v)
            if idx == 2:
                self.part = v
            elif idx == 3:
                self.vendor = v
            elif idx == 4:
                self.product = v
            elif idx == 5:
                self.version = v
            elif idx == 6:
                self.update = v
            elif idx == 7:
                self.edition = v
            elif idx == 8:
                self.language = v
            elif idx == 9:
                self.sw_edition = v
            elif idx == 10:
                self.target_sw = v
            elif idx == 11:
                self.target_hw = v
            elif idx == 12:
                self.other = v

def parse(s):
    cpe = CPE()
    if s[:5] == 'cpe:/':
        cpe.unbind_URI(s)
    elif s[:8] == 'cpe:2.3:':
        cpe.unbind_fs(s)
    else:
        raise ValueError(s)
    return cpe
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)