# -*- coding: utf-8 -*- # Copyright 2005 Lars Wirzenius (liw@iki.fi) # Copyright © 2012 Andreas Beckmann (anbe@debian.org) # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see """Parser for Debian package relationship strings This module contains the class DependencyParser, which parses Debian package relationship strings (e.g., the Depends header). The class raises the DependencySyntaxError exception on syntactic errors. The result uses SimpleDependency objects. Lars Wirzenius """ import re class DependencySyntaxError(Exception): """Syntax error in package dependency declaration""" def __init__(self, msg, cursor): self._msg = "Error: %s: %s (text at error: '%s', full text being parsed: '%s')" % ( cursor.get_position(), msg, cursor.get_text(10), cursor.get_full_text(), ) def __str__(self): return self._msg def __repr__(self): return self._msg class _Cursor: """Store an input string and a movable location in it""" def __init__(self, myinput): self._input = myinput self._len = len(self._input) self._pos = 0 def skip_whitespace(self): while self._pos < self._len and self._input[self._pos].isspace(): self.mynext() def at_end(self): """Are we at the end of the input?""" self.skip_whitespace() return self._pos >= self._len def mynext(self): """Move to the next character""" if self._pos < self._len: self._pos += 1 def get_char(self): """Return current character, None if at end""" if self._pos >= self._len: return None else: return self._input[self._pos] def get_full_text(self): return self._input def get_text(self, length): """Return up to length characters from the current position""" if self._pos >= self._len: return "" else: return self._input[self._pos : self._pos + length] def match(self, regexp): """Match a regular expression against the current position The cursor is advanced by the length of the match, if any. """ m = regexp.match(self._input[self._pos :]) if m: self._pos += len(m.group()) return m def match_literal(self, literal): """Match a literal string against the current position. Return True and move position if there is a match, else return False. """ if self.get_text(len(literal)) == literal: self._pos += len(literal) return True else: return False def get_position(self): """Return current position, as string""" return "pos %d" % self._pos class SimpleDependency: """Express simple dependency towards another package""" def __init__(self, name, operator, version, arch): self.name = name self.operator = operator self.version = version self.arch = arch def __repr__(self): return "" % ( self.name, self.operator, self.version, self.arch, ) class DependencyParser: """Parse Debian package relationship strings Debian packages have a rich language for expressing their relationships. See the Debian Policy Manual, chapter 7 ("Declaring relationships between packages"). This Python module implements a parser for strings expressing such relationships. Syntax of dependency fields (Pre-Depends, Depends, Recommends, Suggests, Conflicts, Provides, Replaces, Enhances, Build-Depends, Build-Depends-Indep, Build-Conflicts, Build-Conflicts-Indep), in a BNF-like form: depends-field ::= EMPTY | dependency ("," dependency)* dependency ::= possible-dependency ("|" possible-dependency)* possible-dependency ::= package-name version-dependency? arch-restriction? version-dependency ::= "(" relative-operator version-number ")" relative-operator ::= "<<" | "<=" | "=" | ">=" | ">>" | "<" | ">" version-number ::= epoch? upstream-version debian-revision? arch-restriction ::= "[" arch-name arch-name* "]" | "[" "!" arch-name ("!" arch-name)* "]" package-name ::= alphanumeric name-char name-char* ":any"? epoch ::= integer ":" upstream-version ::= alphanumeric version-char* -- policy says "should start with digit", but not all packages do debian-revision ::= "-" debian-version-char debian-version-char* arch-name ::= alphanumeric alphanumeric* EMPTY ::= "" integer ::= digit digit* alphanumeric ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | digit digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" name-char ::= alphanumeric | "+" | "-" | "." | "_" version-char ::= alphanumeric | "." | "+" | "-" | ":" | "~" debian-version-char ::= alphanumeric | "." | "+" White space can occur between any tokens except inside package-name, version-number, or arch-name. Some of the headers restrict the syntax somewhat, e.g., Provides does not allow version-dependency, but this is not included in the syntax for simplicity. Note: Added "_" to name-char, because some packages (type-handling in particular) use Provides: headers with bogus package names. Note: Added upper case letters to name pattern, since it some of the Mozilla localization packages use or used them. """ def __init__(self, input_string): self._cursor = _Cursor(input_string) self._list = self._parse_dependencies() def get_dependencies(self): """Return parsed dependencies The result is a list of lists of SimpleDependency objects. Let's try that again. The result is a list of dependencies, corresponding to the comma-separated items in the dependency list. Each dependency is also a list, or SimpleDependency objects, representing alternative ways to fulfill the dependency; in other words, items separated by the vertical bar (|). For example, "foo, bar | foobar" would result in the following list: [[foo], [bar, foobar]]. """ return self._list def _parse_dependencies(self): vlist = [] dep = self._parse_dependency() while dep: vlist.append(dep) self._cursor.skip_whitespace() if self._cursor.at_end(): break if not self._cursor.match_literal(","): raise DependencySyntaxError("Expected comma", self._cursor) dep = self._parse_dependency() return vlist def _parse_dependency(self): vlist = [] dep = self._parse_possible_dependency() while dep: vlist.append(dep) self._cursor.skip_whitespace() if not self._cursor.match_literal("|"): break dep = self._parse_possible_dependency() return vlist def _parse_possible_dependency(self): name = self._parse_package_name() if not name: return None (op, version) = self._parse_version_dependency() arch = self._parse_arch_restriction() return SimpleDependency(name, op, version, arch) _name_pat = re.compile(r"[a-zA-Z0-9][a-zA-Z0-9+._-]+") # The MultiArch spec supports an ":any" modifier. Loosen the # accepted arch's, to avoid crashing. _any_suffix_pat = re.compile(r":[a-zA-Z0-9-]+") def _parse_package_name(self): self._cursor.skip_whitespace() if self._cursor.at_end(): return None m = self._cursor.match(self._name_pat) if not m: raise DependencySyntaxError("Expected a package name", self._cursor) if self._cursor.match(self._any_suffix_pat): pass return m.group() _op_pat = re.compile(r"(<<|<=|=|>=|>>|<(?![<=])|>(?![>=]))") _version_pat = re.compile( r"(?P\d+:)?" + r"(?P[a-zA-Z0-9+][a-zA-Z0-9.+:~-]*)" + r"(?P-[a-zA-Z0-9.+]+)?" ) def _parse_version_dependency(self): self._cursor.skip_whitespace() if self._cursor.get_char() == "(": self._cursor.mynext() self._cursor.skip_whitespace() opm = self._cursor.match(self._op_pat) if not opm: raise DependencySyntaxError("Expected a version relation " + "operator", self._cursor) operator = opm.group() if operator == "<": operator = "<=" elif operator == ">": operator = ">=" self._cursor.skip_whitespace() verm = self._cursor.match(self._version_pat) if not verm: raise DependencySyntaxError("Expected a version number", self._cursor) self._cursor.skip_whitespace() if self._cursor.get_char() != ")": raise DependencySyntaxError("Expected ')'", self._cursor) self._cursor.mynext() return opm.group(), verm.group() else: return None, None _arch_pat = re.compile(r"!?[a-zA-Z0-9-]+") def _parse_arch_restriction(self): self._cursor.skip_whitespace() if self._cursor.get_char() == "[": self.mynext() vlist = [] while True: self._cursor.skip_whitespace() if self._cursor.get_char() == "]": self._cursor.mynext() break m = self._cursor.match(self._arch_pat) if not m: raise DependencySyntaxError("Expected architecture name", self._cursor) vlist.append(m.group()) return vlist else: return None # vi:set et ts=4 sw=4 :