alpar@0
|
1 |
%%
|
alpar@0
|
2 |
%% Copyright 2007, 2008, 2009 Elsevier Ltd
|
alpar@0
|
3 |
%%
|
alpar@0
|
4 |
%% This file is part of the 'Elsarticle Bundle'.
|
alpar@0
|
5 |
%% ---------------------------------------------
|
alpar@0
|
6 |
%%
|
alpar@0
|
7 |
%% It may be distributed under the conditions of the LaTeX Project Public
|
alpar@0
|
8 |
%% License, either version 1.2 of this license or (at your option) any
|
alpar@0
|
9 |
%% later version. The latest version of this license is in
|
alpar@0
|
10 |
%% http://www.latex-project.org/lppl.txt
|
alpar@0
|
11 |
%% and version 1.2 or later is part of all distributions of LaTeX
|
alpar@0
|
12 |
%% version 1999/12/01 or later.
|
alpar@0
|
13 |
%%
|
alpar@0
|
14 |
%% The list of all files belonging to the 'Elsarticle Bundle' is
|
alpar@0
|
15 |
%% given in the file `manifest.txt'.
|
alpar@0
|
16 |
%%
|
alpar@0
|
17 |
|
alpar@0
|
18 |
%% Template article for Elsevier's document class `elsarticle'
|
alpar@0
|
19 |
%% with numbered style bibliographic references
|
alpar@0
|
20 |
%% SP 2008/03/01
|
alpar@0
|
21 |
|
alpar@0
|
22 |
\documentclass[preprint,12pt]{elsarticle}
|
alpar@0
|
23 |
|
alpar@0
|
24 |
%% Use the option review to obtain double line spacing
|
alpar@0
|
25 |
%% \documentclass[authoryear,preprint,review,12pt]{elsarticle}
|
alpar@0
|
26 |
|
alpar@0
|
27 |
%% Use the options 1p,twocolumn; 3p; 3p,twocolumn; 5p; or 5p,twocolumn
|
alpar@0
|
28 |
%% for a journal layout:
|
alpar@0
|
29 |
%% \documentclass[final,1p,times]{elsarticle}
|
alpar@0
|
30 |
%% \documentclass[final,1p,times,twocolumn]{elsarticle}
|
alpar@0
|
31 |
%% \documentclass[final,3p,times]{elsarticle}
|
alpar@0
|
32 |
%% \documentclass[final,3p,times,twocolumn]{elsarticle}
|
alpar@0
|
33 |
%% \documentclass[final,5p,times]{elsarticle}
|
alpar@0
|
34 |
%% \documentclass[final,5p,times,twocolumn]{elsarticle}
|
alpar@0
|
35 |
|
alpar@0
|
36 |
%% For including figures, graphicx.sty has been loaded in
|
alpar@0
|
37 |
%% elsarticle.cls. If you prefer to use the old commands
|
alpar@0
|
38 |
%% please give \usepackage{epsfig}
|
alpar@0
|
39 |
|
alpar@0
|
40 |
%% The amssymb package provides various useful mathematical symbols
|
alpar@0
|
41 |
\usepackage{amssymb}
|
alpar@0
|
42 |
%% The amsthm package provides extended theorem environments
|
alpar@0
|
43 |
%% \usepackage{amsthm}
|
alpar@0
|
44 |
|
alpar@0
|
45 |
%% The lineno packages adds line numbers. Start line numbering with
|
alpar@0
|
46 |
%% \begin{linenumbers}, end it with \end{linenumbers}. Or switch it on
|
alpar@0
|
47 |
%% for the whole article with \linenumbers.
|
alpar@0
|
48 |
%% \usepackage{lineno}
|
alpar@0
|
49 |
|
alpar@2
|
50 |
\usepackage{amsmath}
|
alpar@2
|
51 |
%% \usepackage[pdftex]{graphicx}
|
alpar@2
|
52 |
|
alpar@2
|
53 |
\usepackage{pgfplots}
|
alpar@2
|
54 |
\pgfplotsset{width=9cm}
|
alpar@2
|
55 |
\pgfplotsset{compat=1.8}
|
alpar@2
|
56 |
|
alpar@2
|
57 |
\usepackage{caption}
|
alpar@2
|
58 |
\usepackage{subcaption}
|
alpar@2
|
59 |
|
alpar@2
|
60 |
\usepackage{algorithm}
|
alpar@2
|
61 |
\usepackage{algpseudocode}
|
alpar@2
|
62 |
\usepackage{tikz}
|
alpar@2
|
63 |
|
alpar@2
|
64 |
\usepackage{amsthm,amssymb}
|
alpar@2
|
65 |
\renewcommand{\qedsymbol}{\rule{0.7em}{0.7em}}
|
alpar@2
|
66 |
|
alpar@2
|
67 |
\newtheorem{theorem}{Theorem}[subsection]
|
alpar@2
|
68 |
\newtheorem{corollary}{Corollary}[theorem]
|
alpar@2
|
69 |
\newtheorem{claim}[theorem]{Claim}
|
alpar@2
|
70 |
|
alpar@2
|
71 |
\newtheorem{definition}{Definition}[subsection]
|
alpar@2
|
72 |
\newtheorem{notation}{Notation}[subsection]
|
alpar@2
|
73 |
\newtheorem{example}{Example}[subsection]
|
alpar@2
|
74 |
\usetikzlibrary{decorations.markings}
|
alpar@2
|
75 |
\let\oldproofname=\proofname
|
alpar@2
|
76 |
%% \renewcommand{\proofname}{\rm\bf{Proof:}}
|
alpar@2
|
77 |
|
Madarasi@7
|
78 |
\captionsetup{font=normalsize}
|
Madarasi@7
|
79 |
|
alpar@1
|
80 |
\journal{Discrete Applied Mathematics}
|
alpar@0
|
81 |
|
alpar@0
|
82 |
\begin{document}
|
alpar@0
|
83 |
|
alpar@0
|
84 |
\begin{frontmatter}
|
alpar@0
|
85 |
|
alpar@0
|
86 |
%% Title, authors and addresses
|
alpar@0
|
87 |
|
alpar@0
|
88 |
%% use the tnoteref command within \title for footnotes;
|
alpar@0
|
89 |
%% use the tnotetext command for theassociated footnote;
|
alpar@0
|
90 |
%% use the fnref command within \author or \address for footnotes;
|
alpar@0
|
91 |
%% use the fntext command for theassociated footnote;
|
alpar@0
|
92 |
%% use the corref command within \author for corresponding author footnotes;
|
alpar@0
|
93 |
%% use the cortext command for theassociated footnote;
|
alpar@0
|
94 |
%% use the ead command for the email address,
|
alpar@0
|
95 |
%% and the form \ead[url] for the home page:
|
alpar@0
|
96 |
%% \title{Title\tnoteref{label1}}
|
alpar@0
|
97 |
%% \tnotetext[label1]{}
|
alpar@0
|
98 |
%% \author{Name\corref{cor1}\fnref{label2}}
|
alpar@0
|
99 |
%% \ead{email address}
|
alpar@0
|
100 |
%% \ead[url]{home page}
|
alpar@0
|
101 |
%% \fntext[label2]{}
|
alpar@0
|
102 |
%% \cortext[cor1]{}
|
alpar@0
|
103 |
%% \address{Address\fnref{label3}}
|
alpar@0
|
104 |
%% \fntext[label3]{}
|
alpar@0
|
105 |
|
alpar@1
|
106 |
\title{Improved Algorithms for Matching Biological Graphs}
|
alpar@0
|
107 |
|
alpar@0
|
108 |
%% use optional labels to link authors explicitly to addresses:
|
alpar@0
|
109 |
%% \author[label1,label2]{}
|
alpar@0
|
110 |
%% \address[label1]{}
|
alpar@0
|
111 |
%% \address[label2]{}
|
alpar@0
|
112 |
|
alpar@1
|
113 |
\author{Alp{\'a}r J{\"u}ttner and P{\'e}ter Madarasi}
|
alpar@0
|
114 |
|
alpar@1
|
115 |
\address{Dept of Operations Research, ELTE}
|
alpar@0
|
116 |
|
alpar@0
|
117 |
\begin{abstract}
|
alpar@1
|
118 |
Subgraph isomorphism is a well-known NP-Complete problem, while its
|
alpar@1
|
119 |
special case, the graph isomorphism problem is one of the few problems
|
alpar@1
|
120 |
in NP neither known to be in P nor NP-Complete. Their appearance in
|
alpar@1
|
121 |
many fields of application such as pattern analysis, computer vision
|
alpar@1
|
122 |
questions and the analysis of chemical and biological systems has
|
alpar@1
|
123 |
fostered the design of various algorithms for handling special graph
|
alpar@1
|
124 |
structures.
|
alpar@0
|
125 |
|
alpar@1
|
126 |
The idea of using state space representation and checking some
|
alpar@1
|
127 |
conditions in each state to prune the search tree has made the VF2
|
alpar@1
|
128 |
algorithm one of the state of the art graph matching algorithms for
|
alpar@1
|
129 |
more than a decade. Recently, biological questions of ever increasing
|
alpar@1
|
130 |
importance have required more efficient, specialized algorithms.
|
alpar@1
|
131 |
|
alpar@1
|
132 |
This paper presents VF2++, a new algorithm based on the original VF2,
|
alpar@1
|
133 |
which runs significantly faster on most test cases and performs
|
alpar@1
|
134 |
especially well on special graph classes stemming from biological
|
alpar@1
|
135 |
questions. VF2++ handles graphs of thousands of nodes in practically
|
alpar@1
|
136 |
near linear time including preprocessing. Not only is it an improved
|
alpar@1
|
137 |
version of VF2, but in fact, it is by far the fastest existing
|
alpar@1
|
138 |
algorithm regarding biological graphs.
|
alpar@1
|
139 |
|
alpar@1
|
140 |
The reason for VF2++' superiority over VF2 is twofold. Firstly, taking
|
alpar@1
|
141 |
into account the structure and the node labeling of the graph, VF2++
|
alpar@1
|
142 |
determines a state order in which most of the unfruitful branches of
|
alpar@1
|
143 |
the search space can be pruned immediately. Secondly, introducing more
|
alpar@1
|
144 |
efficient - nevertheless still easier to compute - cutting rules
|
alpar@1
|
145 |
reduces the chance of going astray even further.
|
alpar@1
|
146 |
|
alpar@1
|
147 |
In addition to the usual subgraph isomorphism, specialized versions
|
alpar@1
|
148 |
for induced subgraph isomorphism and for graph isomorphism are
|
alpar@1
|
149 |
presented. VF2++ has gained a runtime improvement of one order of
|
alpar@1
|
150 |
magnitude respecting induced subgraph isomorphism and a better
|
alpar@1
|
151 |
asymptotical behaviour in the case of graph isomorphism problem.
|
alpar@1
|
152 |
|
alpar@1
|
153 |
After having provided the description of VF2++, in order to evaluate
|
alpar@1
|
154 |
its effectiveness, an extensive comparison to the contemporary other
|
alpar@1
|
155 |
algorithms is shown, using a wide range of inputs, including both real
|
alpar@1
|
156 |
life biological and chemical datasets and standard randomly generated
|
alpar@1
|
157 |
graph series.
|
alpar@1
|
158 |
|
alpar@1
|
159 |
The work was motivated and sponsored by QuantumBio Inc., and all the
|
alpar@1
|
160 |
developed algorithms are available as the part of the open source
|
alpar@1
|
161 |
LEMON graph and network optimization library
|
alpar@1
|
162 |
(http://lemon.cs.elte.hu).
|
alpar@0
|
163 |
\end{abstract}
|
alpar@0
|
164 |
|
alpar@0
|
165 |
\begin{keyword}
|
alpar@0
|
166 |
%% keywords here, in the form: keyword \sep keyword
|
alpar@0
|
167 |
|
alpar@0
|
168 |
%% PACS codes here, in the form: \PACS code \sep code
|
alpar@0
|
169 |
|
alpar@0
|
170 |
%% MSC codes here, in the form: \MSC code \sep code
|
alpar@0
|
171 |
%% or \MSC[2008] code \sep code (2000 is the default)
|
alpar@0
|
172 |
|
alpar@0
|
173 |
\end{keyword}
|
alpar@0
|
174 |
|
alpar@0
|
175 |
\end{frontmatter}
|
alpar@0
|
176 |
|
alpar@0
|
177 |
%% \linenumbers
|
alpar@0
|
178 |
|
alpar@0
|
179 |
%% main text
|
alpar@2
|
180 |
\section{Introduction}
|
alpar@2
|
181 |
\label{sec:intro}
|
alpar@2
|
182 |
|
alpar@3
|
183 |
In the last decades, combinatorial structures, and especially graphs
|
alpar@3
|
184 |
have been considered with ever increasing interest, and applied to the
|
alpar@3
|
185 |
solution of several new and revised questions. The expressiveness,
|
alpar@3
|
186 |
the simplicity and the studiedness of graphs make them practical for
|
alpar@3
|
187 |
modelling and appear constantly in several seemingly independent
|
alpar@3
|
188 |
fields. Bioinformatics and chemistry are amongst the most relevant
|
alpar@3
|
189 |
and most important fields.
|
alpar@2
|
190 |
|
alpar@3
|
191 |
Complex biological systems arise from the interaction and cooperation
|
alpar@3
|
192 |
of plenty of molecular components. Getting acquainted with such
|
alpar@3
|
193 |
systems at the molecular level has primary importance, since
|
alpar@3
|
194 |
protein-protein interaction, DNA-protein interaction, metabolic
|
alpar@3
|
195 |
interaction, transcription factor binding, neuronal networks, and
|
alpar@3
|
196 |
hormone signaling networks can be understood only this way.
|
alpar@2
|
197 |
|
alpar@3
|
198 |
For instance, a molecular structure can be considered as a graph,
|
alpar@3
|
199 |
whose nodes correspond to atoms and whose edges to chemical bonds. The
|
alpar@3
|
200 |
secondary structure of a protein can also be represented as a graph,
|
alpar@3
|
201 |
where nodes are associated with aminoacids and the edges with hydrogen
|
alpar@3
|
202 |
bonds. The nodes are often whole molecular components and the edges
|
alpar@3
|
203 |
represent some relationships among them. The similarity and
|
alpar@3
|
204 |
dissimilarity of objects corresponding to nodes are incorporated to
|
alpar@3
|
205 |
the model by \emph{node labels}. Many other chemical and biological
|
alpar@3
|
206 |
structures can easily be modeled in a similar way. Understanding such
|
alpar@3
|
207 |
networks basically requires finding specific subgraphs, which can not
|
alpar@3
|
208 |
avoid the application of graph matching algorithms.
|
alpar@2
|
209 |
|
alpar@3
|
210 |
Finally, let some of the other real-world fields related to some
|
alpar@3
|
211 |
variants of graph matching be briefly mentioned: pattern recognition
|
alpar@3
|
212 |
and machine vision \cite{HorstBunkeApplications}, symbol recognition
|
alpar@3
|
213 |
\cite{CordellaVentoSymbolRecognition}, face identification
|
alpar@3
|
214 |
\cite{JianzhuangYongFaceIdentification}. \\
|
alpar@2
|
215 |
|
alpar@3
|
216 |
Subgraph and induced subgraph matching problems are known to be
|
alpar@3
|
217 |
NP-Complete\cite{SubgraphNPC}, while the graph isomorphism problem is
|
alpar@3
|
218 |
one of the few problems in NP neither known to be in P nor
|
alpar@3
|
219 |
NP-Complete. Although polynomial time isomorphism algorithms are known
|
alpar@3
|
220 |
for various graph classes, like trees and planar
|
alpar@3
|
221 |
graphs\cite{PlanarGraphIso}, bounded valence
|
alpar@3
|
222 |
graphs\cite{BondedDegGraphIso}, interval graphs\cite{IntervalGraphIso}
|
alpar@3
|
223 |
or permutation graphs\cite{PermGraphIso}.
|
alpar@2
|
224 |
|
alpar@3
|
225 |
In the following, some algorithms based on other approaches are
|
alpar@3
|
226 |
summarized, which do not need any restrictions on the graphs. However,
|
alpar@3
|
227 |
an overall polynomial behaviour is not expectable from such an
|
alpar@3
|
228 |
alternative, it may often have good performance, even on a graph class
|
alpar@3
|
229 |
for which polynomial algorithm is known. Note that this summary
|
alpar@3
|
230 |
containing only exact matching algorithms is far not complete, neither
|
alpar@3
|
231 |
does it cover all the recent algorithms.
|
alpar@2
|
232 |
|
alpar@3
|
233 |
The first practically usable approach was due to
|
alpar@4
|
234 |
Ullmann\cite{Ullmann} which is a commonly used depth-first
|
alpar@3
|
235 |
search based algorithm with a complex heuristic for reducing the
|
alpar@3
|
236 |
number of visited states. A major problem is its $\Theta(n^3)$ space
|
alpar@3
|
237 |
complexity, which makes it impractical in the case of big sparse
|
alpar@3
|
238 |
graphs.
|
alpar@2
|
239 |
|
alpar@4
|
240 |
In a recent paper, Ullmann\cite{UllmannBit} presents an
|
alpar@3
|
241 |
improved version of this algorithm based on a bit-vector solution for
|
alpar@3
|
242 |
the binary Constraint Satisfaction Problem.
|
alpar@2
|
243 |
|
alpar@4
|
244 |
The Nauty algorithm\cite{Nauty} transforms the two graphs to
|
alpar@3
|
245 |
a canonical form before starting to check for the isomorphism. It has
|
alpar@3
|
246 |
been considered as one of the fastest graph isomorphism algorithms,
|
alpar@3
|
247 |
although graph categories were shown in which it takes exponentially
|
alpar@3
|
248 |
many steps. This algorithm handles only the graph isomorphism problem.
|
alpar@2
|
249 |
|
alpar@4
|
250 |
The \emph{LAD} algorithm\cite{Lad} uses a depth-first search
|
alpar@3
|
251 |
strategy and formulates the matching as a Constraint Satisfaction
|
alpar@3
|
252 |
Problem to prune the search tree. The constraints are that the mapping
|
alpar@3
|
253 |
has to be injective and edge-preserving, hence it is possible to
|
alpar@3
|
254 |
handle new matching types as well.
|
alpar@2
|
255 |
|
alpar@3
|
256 |
The \textbf{RI} algorithm\cite{RI} and its variations are based on a
|
alpar@3
|
257 |
state space representation. After reordering the nodes of the graphs,
|
alpar@3
|
258 |
it uses some fast executable heuristic checks without using any
|
alpar@3
|
259 |
complex pruning rules. It seems to run really efficiently on graphs
|
alpar@3
|
260 |
coming from biology, and won the International Contest on Pattern
|
alpar@3
|
261 |
Search in Biological Databases\cite{Content}.
|
alpar@2
|
262 |
|
alpar@3
|
263 |
The currently most commonly used algorithm is the
|
alpar@3
|
264 |
\textbf{VF2}\cite{VF2}, the improved version of VF\cite{VF}, which was
|
alpar@3
|
265 |
designed for solving pattern matching and computer vision problems,
|
alpar@3
|
266 |
and has been one of the best overall algorithms for more than a
|
alpar@3
|
267 |
decade. Although, it can't be up to new specialized algorithms, it is
|
alpar@3
|
268 |
still widely used due to its simplicity and space efficiency. VF2 uses
|
alpar@3
|
269 |
a state space representation and checks some conditions in each state
|
alpar@3
|
270 |
to prune the search tree.
|
alpar@2
|
271 |
|
alpar@3
|
272 |
Our first graph matching algorithm was the first version of VF2 which
|
alpar@3
|
273 |
recognizes the significance of the node ordering, more opportunities
|
alpar@3
|
274 |
to increase the cutting efficiency and reduce its computational
|
alpar@3
|
275 |
complexity. This project was initiated and sponsored by QuantumBio
|
alpar@3
|
276 |
Inc.\cite{QUANTUMBIO} and the implementation --- along with a source
|
alpar@3
|
277 |
code --- has been published as a part of LEMON\cite{LEMON} open source
|
alpar@3
|
278 |
graph library.
|
alpar@2
|
279 |
|
alpar@3
|
280 |
This paper introduces \textbf{VF2++}, a new further improved algorithm
|
alpar@3
|
281 |
for the graph and (induced)subgraph isomorphism problem, which uses
|
alpar@3
|
282 |
efficient cutting rules and determines a node order in which VF2 runs
|
alpar@3
|
283 |
significantly faster on practical inputs.
|
alpar@2
|
284 |
|
alpar@3
|
285 |
Meanwhile, another variant called \textbf{VF2 Plus}\cite{VF2Plus} has
|
alpar@3
|
286 |
been published. It is considered to be as efficient as the RI
|
alpar@3
|
287 |
algorithm and has a strictly better behavior on large graphs. The
|
alpar@3
|
288 |
main idea of VF2 Plus is to precompute a heuristic node order of the
|
alpar@3
|
289 |
small graph, in which the VF2 works more efficiently.
|
alpar@2
|
290 |
|
alpar@2
|
291 |
\section{Problem Statement}
|
alpar@3
|
292 |
This section provides a detailed description of the problems to be
|
alpar@3
|
293 |
solved.
|
alpar@2
|
294 |
\subsection{Definitions}
|
alpar@2
|
295 |
|
alpar@3
|
296 |
Throughout the paper $G_{small}=(V_{small}, E_{small})$ and
|
alpar@3
|
297 |
$G_{large}=(V_{large}, E_{large})$ denote two undirected graphs.
|
alpar@2
|
298 |
\begin{definition}\label{sec:ismorphic}
|
alpar@3
|
299 |
$G_{small}$ and $G_{large}$ are \textbf{isomorphic} if $\exists M:
|
alpar@3
|
300 |
V_{small} \longrightarrow V_{large}$ bijection, for which the
|
alpar@3
|
301 |
following is true:
|
alpar@2
|
302 |
\begin{center}
|
alpar@3
|
303 |
$\forall u,v\in{V_{small}} : (u,v)\in{E_{small}} \Leftrightarrow
|
alpar@3
|
304 |
(M(u),M(v))\in{E_{large}}$
|
alpar@2
|
305 |
\end{center}
|
alpar@2
|
306 |
\end{definition}
|
alpar@3
|
307 |
For the sake of simplicity in this paper subgraphs and induced
|
alpar@3
|
308 |
subgraphs are defined in a more general way than usual:
|
alpar@2
|
309 |
\begin{definition}
|
alpar@3
|
310 |
$G_{small}$ is a \textbf{subgraph} of $G_{large}$ if $\exists I:
|
alpar@3
|
311 |
V_{small}\longrightarrow V_{large}$ injection, for which the
|
alpar@3
|
312 |
following is true:
|
alpar@2
|
313 |
\begin{center}
|
alpar@2
|
314 |
$\forall u,v \in{V_{small}} : (u,v)\in{E_{small}} \Rightarrow (I(u),I(v))\in E_{large}$
|
alpar@2
|
315 |
\end{center}
|
alpar@2
|
316 |
\end{definition}
|
alpar@2
|
317 |
|
alpar@2
|
318 |
\begin{definition}
|
alpar@3
|
319 |
$G_{small}$ is an \textbf{induced subgraph} of $G_{large}$ if $\exists
|
alpar@3
|
320 |
I: V_{small}\longrightarrow V_{large}$ injection, for which the
|
alpar@3
|
321 |
following is true:
|
alpar@2
|
322 |
\begin{center}
|
alpar@3
|
323 |
$\forall u,v \in{V_{small}} : (u,v)\in{E_{small}} \Leftrightarrow
|
alpar@3
|
324 |
(I(u),I(v))\in E_{large}$
|
alpar@2
|
325 |
\end{center}
|
alpar@2
|
326 |
\end{definition}
|
alpar@2
|
327 |
|
alpar@2
|
328 |
\begin{definition}
|
alpar@3
|
329 |
$lab: (V_{small}\cup V_{large}) \longrightarrow K$ is a \textbf{node
|
alpar@3
|
330 |
label function}, where K is an arbitrary set. The elements in K
|
alpar@3
|
331 |
are the \textbf{node labels}. Two nodes, u and v are said to be
|
alpar@3
|
332 |
\textbf{equivalent}, if $lab(u)=lab(v)$.
|
alpar@2
|
333 |
\end{definition}
|
alpar@2
|
334 |
|
alpar@3
|
335 |
When node labels are also given, the matched nodes must have the same
|
alpar@3
|
336 |
labels. For example, the node labeled isomorphism is phrased by
|
alpar@2
|
337 |
\begin{definition}
|
alpar@3
|
338 |
$G_{small}$ and $G_{large}$ are \textbf{isomorphic by the node label
|
alpar@3
|
339 |
function lab} if $\exists M: V_{small} \longrightarrow V_{large}$
|
alpar@3
|
340 |
bijection, for which the following is true:
|
alpar@2
|
341 |
\begin{center}
|
alpar@3
|
342 |
$(\forall u,v\in{V_{small}} : (u,v)\in{E_{small}} \Leftrightarrow
|
alpar@3
|
343 |
(M(u),M(v))\in{E_{large}})$ and $(\forall u\in{V_{small}} :
|
alpar@3
|
344 |
lab(u)=lab(M(u)))$
|
alpar@2
|
345 |
\end{center}
|
alpar@2
|
346 |
\end{definition}
|
alpar@2
|
347 |
|
alpar@2
|
348 |
The other two definitions can be extended in the same way.
|
alpar@2
|
349 |
|
alpar@3
|
350 |
Note that edge label function can be defined similarly to node label
|
alpar@3
|
351 |
function, and all the definitions can be extended with additional
|
alpar@3
|
352 |
conditions, but it is out of the scope of this work.
|
alpar@2
|
353 |
|
alpar@3
|
354 |
The equivalence of two nodes is usually defined by another relation,
|
alpar@3
|
355 |
$\\R\subseteq (V_{small}\cup V_{large})^2$. This overlaps with the
|
alpar@3
|
356 |
definition given above if R is an equivalence relation, which does not
|
alpar@3
|
357 |
mean restriction in biological and chemical applications.
|
alpar@2
|
358 |
|
alpar@2
|
359 |
\subsection{Common problems}\label{sec:CommProb}
|
alpar@2
|
360 |
|
alpar@3
|
361 |
The focus of this paper is on two extensively studied topics, the
|
alpar@3
|
362 |
subgraph isomorphism and its variations. However, the following
|
alpar@3
|
363 |
problems also appear in many applications.
|
alpar@2
|
364 |
|
alpar@3
|
365 |
The \textbf{subgraph matching problem} is the following: is
|
alpar@3
|
366 |
$G_{small}$ isomorphic to any subgraph of $G_{large}$ by a given node
|
alpar@3
|
367 |
label?
|
alpar@2
|
368 |
|
alpar@3
|
369 |
The \textbf{induced subgraph matching problem} asks the same about the
|
alpar@3
|
370 |
existence of an induced subgraph.
|
alpar@2
|
371 |
|
alpar@3
|
372 |
The \textbf{graph isomorphism problem} can be defined as induced
|
alpar@3
|
373 |
subgraph matching problem where the sizes of the two graphs are equal.
|
alpar@2
|
374 |
|
alpar@3
|
375 |
In addition to existence, it may be needed to show such a subgraph, or
|
alpar@3
|
376 |
it may be necessary to list all of them.
|
alpar@2
|
377 |
|
alpar@3
|
378 |
It should be noted that some authors misleadingly refer to the term
|
alpar@3
|
379 |
\emph{subgraph isomorphism problem} as an \emph{induced subgraph
|
alpar@3
|
380 |
isomorphism problem}.
|
alpar@2
|
381 |
|
alpar@3
|
382 |
The following sections give the descriptions of VF2, VF2++, VF2 Plus
|
alpar@3
|
383 |
and a particular comparison.
|
alpar@2
|
384 |
|
alpar@2
|
385 |
\section{The VF2 Algorithm}
|
alpar@3
|
386 |
This algorithm is the basis of both the VF2++ and the VF2 Plus. VF2
|
alpar@4
|
387 |
is able to handle all the variations mentioned in Section
|
alpar@4
|
388 |
\ref{sec:CommProb}. Although it can also handle directed graphs,
|
alpar@3
|
389 |
for the sake of simplicity, only the undirected case will be
|
alpar@3
|
390 |
discussed.
|
alpar@2
|
391 |
|
alpar@2
|
392 |
|
alpar@2
|
393 |
\subsection{Common notations}
|
alpar@3
|
394 |
\indent Assume $G_{small}$ is searched in $G_{large}$. The following
|
alpar@3
|
395 |
definitions and notations will be used throughout the whole paper.
|
alpar@2
|
396 |
\begin{definition}
|
alpar@3
|
397 |
A set $M\subseteq V_{small}\times V_{large}$ is called
|
alpar@3
|
398 |
\textbf{mapping}, if no node of $V_{small}$ or of $V_{large}$ appears
|
alpar@3
|
399 |
in more than one pair in M. That is, M uniquely associates some of
|
alpar@3
|
400 |
the nodes in $V_{small}$ with some nodes of $V_{large}$ and vice
|
alpar@3
|
401 |
versa.
|
alpar@2
|
402 |
\end{definition}
|
alpar@2
|
403 |
|
alpar@2
|
404 |
\begin{definition}
|
alpar@3
|
405 |
Mapping M \textbf{covers} a node v, if there exists a pair in M, which
|
alpar@3
|
406 |
contains v.
|
alpar@2
|
407 |
\end{definition}
|
alpar@2
|
408 |
|
alpar@2
|
409 |
\begin{definition}
|
alpar@3
|
410 |
A mapping $M$ is $\mathbf{whole\ mapping}$, if $M$ covers all the
|
alpar@3
|
411 |
nodes in $V_{small}$.
|
alpar@2
|
412 |
\end{definition}
|
alpar@2
|
413 |
|
alpar@2
|
414 |
\begin{notation}
|
alpar@3
|
415 |
Let $\mathbf{M_{small}(s)} := \{u\in V_{small} : \exists v\in
|
alpar@3
|
416 |
V_{large}: (u,v)\in M(s)\}$ and $\mathbf{M_{large}(s)} := \{v\in
|
alpar@3
|
417 |
V_{large} : \exists u\in V_{small}: (u,v)\in M(s)\}$.
|
alpar@2
|
418 |
\end{notation}
|
alpar@2
|
419 |
|
alpar@2
|
420 |
\begin{notation}
|
alpar@3
|
421 |
Let $\mathbf{Pair(M,v)}$ be the pair of $v$ in $M$, if such a node
|
alpar@3
|
422 |
exist, otherwise $\mathbf{Pair(M,v)}$ is undefined. For a mapping $M$
|
alpar@3
|
423 |
and $v\in V_{small}\cup V_{large}$.
|
alpar@2
|
424 |
\end{notation}
|
alpar@2
|
425 |
|
alpar@2
|
426 |
Note that if $\mathbf{Pair(M,v)}$ exists, then it is unique
|
alpar@2
|
427 |
|
alpar@3
|
428 |
The definitions of the isomorphism types can be rephrased on the
|
alpar@3
|
429 |
existence of a special whole mapping $M$, since it represents a
|
alpar@3
|
430 |
bijection. For example
|
alpar@2
|
431 |
\begin{center}
|
alpar@3
|
432 |
$M\subseteq V_{small}\times V_{large}$ represents an induced subgraph
|
alpar@3
|
433 |
isomorphism $\Leftrightarrow$ $M$ is whole mapping and $\forall u,v
|
alpar@3
|
434 |
\in{V_{small}} : (u,v)\in{E_{small}} \Leftrightarrow
|
alpar@3
|
435 |
(Pair(M,u),Pair(M,v))\in E_{large}$.
|
alpar@2
|
436 |
\end{center}
|
alpar@2
|
437 |
|
alpar@2
|
438 |
\begin{definition}
|
alpar@2
|
439 |
A set of whole mappings is called \textbf{problem type}.
|
alpar@2
|
440 |
\end{definition}
|
alpar@3
|
441 |
Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
|
alpar@3
|
442 |
which can be substituted by any problem type.
|
alpar@2
|
443 |
|
alpar@3
|
444 |
A whole mapping $W\mathbf{\ is\ of\ type\ PT}$, if $W\in PT$. Using
|
alpar@3
|
445 |
this notations, VF2 searches a whole mapping $W$ of type $PT$.
|
alpar@2
|
446 |
|
alpar@3
|
447 |
For example the problem type of graph isomorphism problem is the
|
alpar@3
|
448 |
following. A whole mapping $W$ is in $\mathbf{ISO}$, iff the
|
alpar@4
|
449 |
bijection represented by $W$ satisfies Definition~\ref{sec:ismorphic}.
|
alpar@4
|
450 |
The subgraph- and induced subgraph matching problems can be formalized
|
alpar@4
|
451 |
in a similar way. Let their problem types be denoted as $\mathbf{SUB}$
|
alpar@4
|
452 |
and $\mathbf{IND}$.
|
alpar@2
|
453 |
|
alpar@2
|
454 |
\begin{definition}
|
alpar@2
|
455 |
\label{expPT}
|
alpar@3
|
456 |
$PT$ is an \textbf{expanding problem type} if $\ \forall\ W\in
|
alpar@3
|
457 |
PT:\ \forall u_1,u_2\in V_{small}:\ (u_1,u_2)\in E_{small}\Rightarrow
|
alpar@3
|
458 |
(Pair(W,u_1),Pair(W,u_2))\in E_{large}$, that is each edge of
|
alpar@3
|
459 |
$G_{small}$ has to be mapped to an edge of $G_{large}$ for each
|
alpar@3
|
460 |
mapping in $PT$.
|
alpar@2
|
461 |
\end{definition}
|
alpar@2
|
462 |
|
alpar@2
|
463 |
Note that $ISO$, $SUB$ and $IND$ are expanding problem types.
|
alpar@2
|
464 |
|
alpar@3
|
465 |
This paper deals with the three problem types mentioned above only,
|
alpar@3
|
466 |
but the following generic definitions make it possible to handle other
|
alpar@3
|
467 |
types as well. Although it may be challenging to find a proper
|
alpar@3
|
468 |
consistency function and an efficient cutting function.
|
alpar@2
|
469 |
|
alpar@2
|
470 |
\begin{definition}
|
alpar@3
|
471 |
Let M be a mapping. A logical function $\mathbf{Cons_{PT}}$ is a
|
alpar@3
|
472 |
\textbf{consistency function by } $\mathbf{PT}$, if the following
|
alpar@3
|
473 |
holds. If there exists whole mapping $W$ of type PT for which
|
alpar@3
|
474 |
$M\subseteq W$, then $Cons_{PT}(M)$ is true.
|
alpar@2
|
475 |
\end{definition}
|
alpar@2
|
476 |
|
alpar@2
|
477 |
\begin{definition}
|
alpar@3
|
478 |
Let M be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
|
alpar@3
|
479 |
\textbf{cutting function by } $\mathbf{PT}$, if the following
|
alpar@3
|
480 |
holds. $\mathbf{Cut_{PT}(M)}$ is false if $M$ can be extended to a
|
alpar@3
|
481 |
whole mapping W of type PT.
|
alpar@2
|
482 |
\end{definition}
|
alpar@2
|
483 |
|
alpar@2
|
484 |
\begin{definition}
|
alpar@3
|
485 |
$M$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$, if
|
alpar@3
|
486 |
$Cons_{PT}(M)$ is true.
|
alpar@2
|
487 |
\end{definition}
|
alpar@2
|
488 |
|
alpar@2
|
489 |
$Cons_{PT}$ and $Cut_{PT}$ will often be used in the following form.
|
alpar@2
|
490 |
\begin{notation}
|
alpar@3
|
491 |
Let $\mathbf{Cons_{PT}(p, M)}:=Cons_{PT}(M\cup\{p\})$ and
|
alpar@3
|
492 |
$\mathbf{Cut_{PT}(p, M)}:=Cut_{PT}(M\cup\{p\})$, where
|
alpar@3
|
493 |
$p\in{V_{small}\!\times\!V_{large}}$ and $M\cup\{p\}$ is mapping.
|
alpar@2
|
494 |
\end{notation}
|
alpar@2
|
495 |
|
alpar@3
|
496 |
$Cons_{PT}$ will be used to check the consistency of the already
|
alpar@3
|
497 |
covered nodes, while $Cut_{PT}$ is for looking ahead to recognize if
|
alpar@3
|
498 |
no whole consistent mapping can contain the current mapping.
|
alpar@2
|
499 |
|
alpar@2
|
500 |
\subsection{Overview of the algorithm}
|
alpar@3
|
501 |
VF2 uses a state space representation of mappings, $Cons_{PT}$ for
|
alpar@3
|
502 |
excluding inconsistency with the problem type and $Cut_{PT}$ for
|
alpar@3
|
503 |
pruning the search tree. Each state $s$ of the matching process can
|
alpar@3
|
504 |
be associated with a mapping $M(s)$.
|
alpar@2
|
505 |
|
alpar@4
|
506 |
Algorithm~\ref{alg:VF2Pseu} is a high level description of
|
alpar@3
|
507 |
the VF2 matching algorithm.
|
alpar@2
|
508 |
|
alpar@2
|
509 |
|
alpar@2
|
510 |
\begin{algorithm}
|
alpar@3
|
511 |
\algtext*{EndIf}%ne nyomtasson end if-et \algtext*{EndFor}%ne
|
alpar@3
|
512 |
nyomtasson .. \algtext*{EndProcedure}%ne nyomtasson ..
|
alpar@2
|
513 |
\caption{\hspace{0.5cm}$A\ high\ level\ description\ of\ VF2$}\label{alg:VF2Pseu}
|
alpar@2
|
514 |
\begin{algorithmic}[1]
|
alpar@2
|
515 |
|
alpar@3
|
516 |
\Procedure{VF2}{State $s$, ProblemType $PT$} \If{$M(s$) covers
|
alpar@3
|
517 |
$V_{small}$} \State Output($M(s)$) \Else
|
alpar@2
|
518 |
|
alpar@3
|
519 |
\State Compute the set $P(s)$ of the pairs candidate for inclusion
|
alpar@3
|
520 |
in $M(s)$ \ForAll{$p\in{P(s)}$} \If{Cons$_{PT}$($p, M(s)$) $\wedge$
|
alpar@3
|
521 |
$\neg$Cut$_{PT}$($p, M(s)$)} \State Compute the nascent state
|
alpar@3
|
522 |
$\tilde{s}$ by adding $p$ to $M(s)$ \State \textbf{call}
|
alpar@3
|
523 |
VF2($\tilde{s}$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
|
alpar@2
|
524 |
\end{algorithmic}
|
alpar@2
|
525 |
\end{algorithm}
|
alpar@2
|
526 |
|
alpar@2
|
527 |
|
alpar@3
|
528 |
The initial state $s_0$ is associated with $M(s_0)=\emptyset$, i.e. it
|
alpar@3
|
529 |
starts with an empty mapping.
|
alpar@2
|
530 |
|
alpar@3
|
531 |
For each state $s$, the algorithm computes $P(s)$, the set of
|
alpar@3
|
532 |
candidate node pairs for adding to the current state $s$.
|
alpar@2
|
533 |
|
alpar@3
|
534 |
For each pair $p$ in $P(s)$, $Cons_{PT}(p,M(s))$ and
|
alpar@3
|
535 |
$Cut_{PT}(p,M(s))$ are evaluated. If $Cons_{PT}(p,M(s))$ is true and
|
alpar@3
|
536 |
$Cut_{PT}(p,M(s))$ is false, the successor state $\tilde{s}=s\cup
|
alpar@3
|
537 |
\{p\}$ is computed, and the whole process is recursively applied to
|
alpar@3
|
538 |
$\tilde{s}$. Otherwise, $\tilde{s}$ is not consistent by $PT$ or it
|
alpar@3
|
539 |
can be proved that $s$ can not be extended to a whole mapping.
|
alpar@2
|
540 |
|
alpar@2
|
541 |
In order to make sure of the correctness see
|
alpar@2
|
542 |
\begin{claim}
|
alpar@3
|
543 |
Through consistent mappings, only consistent whole mappings can be
|
alpar@3
|
544 |
reached, and all of the whole mappings are reachable through
|
alpar@3
|
545 |
consistent mappings.
|
alpar@2
|
546 |
\end{claim}
|
alpar@2
|
547 |
|
alpar@3
|
548 |
Note that a state may be reached in many different ways, since the
|
alpar@3
|
549 |
order of insertions into M does not influence the nascent mapping. In
|
alpar@3
|
550 |
fact, the number of different ways which lead to the same state can be
|
alpar@3
|
551 |
exponentially large. If $G_{small}$ and $G_{large}$ are circles with n
|
alpar@3
|
552 |
nodes and n different node labels, there exists exactly one graph
|
alpar@3
|
553 |
isomorphism between them, but it will be reached in $n!$ different
|
alpar@3
|
554 |
ways.
|
alpar@2
|
555 |
|
alpar@2
|
556 |
However, one may observe
|
alpar@2
|
557 |
|
alpar@2
|
558 |
\begin{claim}
|
alpar@2
|
559 |
\label{claim:claimTotOrd}
|
alpar@3
|
560 |
Let $\prec$ an arbitrary total ordering relation on $V_{small}$. If
|
alpar@3
|
561 |
the algorithm ignores each $p=(u,v) \in P(s)$, for which
|
alpar@2
|
562 |
\begin{center}
|
alpar@2
|
563 |
$\exists (\hat{u},\hat{v})\in P(s): \hat{u} \prec u$,
|
alpar@2
|
564 |
\end{center}
|
alpar@3
|
565 |
then no state can be reached more than ones and each state associated
|
alpar@3
|
566 |
with a whole mapping remains reachable.
|
alpar@2
|
567 |
\end{claim}
|
alpar@2
|
568 |
|
alpar@3
|
569 |
Note that the cornerstone of the improvements to VF2 is a proper
|
alpar@3
|
570 |
choice of a total ordering.
|
alpar@2
|
571 |
|
alpar@2
|
572 |
\subsection{The candidate set P(s)}
|
alpar@2
|
573 |
\label{candidateComputingVF2}
|
alpar@2
|
574 |
$P(s)$ is the set of the candidate pairs for inclusion in $M(s)$.
|
alpar@3
|
575 |
Suppose that $PT$ is an expanding problem type, see
|
alpar@4
|
576 |
Definition~\ref{expPT}.
|
alpar@2
|
577 |
|
alpar@2
|
578 |
\begin{notation}
|
alpar@3
|
579 |
Let $\mathbf{T_{small}(s)}:=\{u \in V_{small} : u$ is not covered by
|
alpar@3
|
580 |
$M(s)\wedge\exists \tilde{u}\in{V_{small}: (u,\tilde{u})\in E_{small}}
|
alpar@3
|
581 |
\wedge \tilde{u}$ is covered by $M(s)\}$, and
|
alpar@3
|
582 |
\\ $\mathbf{T_{large}(s)}\!:=\!\{v \in\!V_{large}\!:\!v$ is not
|
alpar@3
|
583 |
covered by
|
alpar@3
|
584 |
$M(s)\wedge\!\exists\tilde{v}\!\in\!{V_{large}\!:\!(v,\tilde{v})\in\!E_{large}}
|
alpar@3
|
585 |
\wedge \tilde{v}$ is covered by $M(s)\}$
|
alpar@2
|
586 |
\end{notation}
|
alpar@2
|
587 |
|
alpar@3
|
588 |
The set $P(s)$ includes the pairs of uncovered neighbours of covered
|
alpar@3
|
589 |
nodes and if there is not such a node pair, all the pairs containing
|
alpar@3
|
590 |
two uncovered nodes are added. Formally, let
|
alpar@2
|
591 |
\[
|
alpar@2
|
592 |
P(s)\!=\!
|
alpar@2
|
593 |
\begin{cases}
|
alpar@3
|
594 |
T_{small}(s)\times T_{large}(s)&\hspace{-0.15cm}\text{if }
|
alpar@3
|
595 |
T_{small}(s)\!\neq\!\emptyset\!\wedge\!T_{large}(s)\!\neq
|
alpar@3
|
596 |
\emptyset,\\ (V_{small}\!\setminus\!M_{small}(s))\!\times\!(V_{large}\!\setminus\!M_{large}(s))
|
alpar@3
|
597 |
&\hspace{-0.15cm}otherwise.
|
alpar@2
|
598 |
\end{cases}
|
alpar@2
|
599 |
\]
|
alpar@2
|
600 |
|
alpar@2
|
601 |
\subsection{Consistency}
|
alpar@3
|
602 |
This section defines the consistency functions for the different
|
alpar@4
|
603 |
problem types mentioned in Section~\ref{sec:CommProb}.
|
alpar@2
|
604 |
\begin{notation}
|
alpar@3
|
605 |
Let $\mathbf{\Gamma_{small} (u)}:=\{\tilde{u}\in V_{small} :
|
alpar@3
|
606 |
(u,\tilde{u})\in E_{small}\}$\\ Let $\mathbf{\Gamma_{large}
|
alpar@3
|
607 |
(v)}:=\{\tilde{v}\in V_{large} : (v,\tilde{v})\in E_{large}\}$
|
alpar@2
|
608 |
\end{notation}
|
alpar@3
|
609 |
Suppose $p=(u,v)$, where $u\in V_{small}$ and $v\in V_{large}$, $s$ is
|
alpar@3
|
610 |
a state of the matching procedure, $M(s)$ is consistent mapping by
|
alpar@3
|
611 |
$PT$ and $lab(u)=lab(v)$. $Cons_{PT}(p,M(s))$ checks whether
|
alpar@3
|
612 |
including pair $p$ into $M(s)$ leads to a consistent mapping by $PT$.
|
alpar@2
|
613 |
|
alpar@2
|
614 |
\subsubsection{Induced subgraph isomorphism}
|
alpar@3
|
615 |
$M(s)\cup \{(u,v)\}$ is a consistent mapping by $IND$ $\Leftrightarrow
|
alpar@3
|
616 |
(\forall \tilde{u}\in M_{small}: (u,\tilde{u})\in E_{small}
|
alpar@3
|
617 |
\Leftrightarrow (v,Pair(M(s),\tilde{u}))\in E_{large})$.\newline The
|
alpar@3
|
618 |
following formulation gives an efficient way of calculating
|
alpar@3
|
619 |
$Cons_{IND}$.
|
alpar@2
|
620 |
\begin{claim}
|
alpar@3
|
621 |
$Cons_{IND}((u,v),M(s)):=(\forall \tilde{v}\in \Gamma_{large}(v)
|
alpar@3
|
622 |
\ \cap\ M_{large}(s):\\(Pair(M(s),\tilde{v}),u)\in E_{small})\wedge
|
alpar@3
|
623 |
(\forall \tilde{u}\in \Gamma_{small}(u)
|
alpar@3
|
624 |
\ \cap\ M_{small}(s):(v,Pair(M(s),\tilde{u}))\in E_{large})$ is a
|
alpar@3
|
625 |
consistency function in the case of $IND$.
|
alpar@2
|
626 |
\end{claim}
|
alpar@2
|
627 |
|
alpar@2
|
628 |
\subsubsection{Graph isomorphism}
|
alpar@3
|
629 |
$M(s)\cup \{(u,v)\}$ is a consistent mapping by $ISO$
|
alpar@3
|
630 |
$\Leftrightarrow$ $M(s)\cup \{(u,v)\}$ is a consistent mapping by
|
alpar@3
|
631 |
$IND$.
|
alpar@2
|
632 |
\begin{claim}
|
alpar@3
|
633 |
$Cons_{ISO}((u,v),M(s))$ is a consistency function by $ISO$ if and
|
alpar@3
|
634 |
only if it is a consistency function by $IND$.
|
alpar@2
|
635 |
\end{claim}
|
alpar@2
|
636 |
\subsubsection{Subgraph isomorphism}
|
alpar@3
|
637 |
$M(s)\cup \{(u,v)\}$ is a consistent mapping by $SUB$ $\Leftrightarrow
|
alpar@3
|
638 |
(\forall \tilde{u}\in M_{small}:\\(u,\tilde{u})\in E_{small}
|
alpar@3
|
639 |
\Rightarrow (v,Pair(M(s),\tilde{u}))\in E_{large})$.
|
alpar@2
|
640 |
\newline
|
alpar@3
|
641 |
The following formulation gives an efficient way of calculating
|
alpar@3
|
642 |
$Cons_{SUB}$.
|
alpar@2
|
643 |
\begin{claim}
|
alpar@3
|
644 |
$Cons_{SUB}((u,v),M(s)):= (\forall \tilde{u}\in \Gamma_{small}(u)
|
alpar@3
|
645 |
\ \cap\ M_{small}(s):\\(v,Pair(M(s),\tilde{u}))\in E_{large})$ is a
|
alpar@3
|
646 |
consistency function by $SUB$.
|
alpar@2
|
647 |
\end{claim}
|
alpar@2
|
648 |
|
alpar@2
|
649 |
\subsection{Cutting rules}
|
alpar@3
|
650 |
$Cut_{PT}(p,M(s))$ is defined by a collection of efficiently
|
alpar@3
|
651 |
verifiable conditions. The requirement is that $Cut_{PT}(p,M(s))$ can
|
alpar@3
|
652 |
be true only if it is impossible to extended $M(s)\cup \{p\}$ to a
|
alpar@3
|
653 |
whole mapping.
|
alpar@2
|
654 |
\begin{notation}
|
alpar@2
|
655 |
|
alpar@3
|
656 |
Let $\mathbf{\tilde{T}_{small}}(s):=(V_{small}\backslash
|
alpar@3
|
657 |
M_{small}(s))\backslash T_{small}(s)$, and
|
alpar@3
|
658 |
\\ $\mathbf{\tilde{T}_{large}}(s):=(V_{large}\backslash
|
alpar@3
|
659 |
M_{large}(s))\backslash T_{large}(s)$.
|
alpar@2
|
660 |
\end{notation}
|
alpar@2
|
661 |
\subsubsection{Induced subgraph isomorphism}
|
alpar@2
|
662 |
\begin{claim}
|
alpar@3
|
663 |
$Cut_{IND}((u,v),M(s)):= |\Gamma_{large} (v)\ \cap\ T_{large}(s)| <
|
alpar@3
|
664 |
|\Gamma_{small} (u)\ \cap\ T_{small}(s)| \vee |\Gamma_{large}(v)\cap
|
alpar@3
|
665 |
\tilde{T}_{large}(s)| < |\Gamma_{small}(u)\cap
|
alpar@3
|
666 |
\tilde{T}_{small}(s)|$ is a cutting function by $IND$.
|
alpar@2
|
667 |
\end{claim}
|
alpar@2
|
668 |
\subsubsection{Graph isomorphism}
|
alpar@3
|
669 |
Note that the cutting function of induced subgraph isomorphism defined
|
alpar@3
|
670 |
above is a cutting function by $ISO$, too, however it is less
|
alpar@3
|
671 |
efficient than the following while their computational complexity is
|
alpar@3
|
672 |
the same.
|
alpar@2
|
673 |
\begin{claim}
|
alpar@3
|
674 |
$Cut_{ISO}((u,v),M(s)):= |\Gamma_{large} (v)\ \cap\ T_{large}(s)| \neq
|
alpar@3
|
675 |
|\Gamma_{small} (u)\ \cap\ T_{small}(s)| \vee |\Gamma_{large}(v)\cap
|
alpar@3
|
676 |
\tilde{T}_{large}(s)| \neq |\Gamma_{small}(u)\cap
|
alpar@3
|
677 |
\tilde{T}_{small}(s)|$ is a cutting function by $ISO$.
|
alpar@2
|
678 |
\end{claim}
|
alpar@2
|
679 |
|
alpar@2
|
680 |
\subsubsection{Subgraph isomorphism}
|
alpar@2
|
681 |
\begin{claim}
|
alpar@3
|
682 |
$Cut_{SUB}((u,v),M(s)):= |\Gamma_{large} (v)\ \cap\ T_{large}(s)| <
|
alpar@3
|
683 |
|\Gamma_{small} (u)\ \cap\ T_{small}(s)|$ is a cutting function by
|
alpar@3
|
684 |
$SUB$.
|
alpar@2
|
685 |
\end{claim}
|
alpar@3
|
686 |
Note that there is a significant difference between induced and
|
alpar@3
|
687 |
non-induced subgraph isomorphism:
|
alpar@2
|
688 |
|
alpar@2
|
689 |
\begin{claim}
|
alpar@2
|
690 |
\label{claimSUB}
|
alpar@3
|
691 |
$Cut_{SUB}'((u,v),M(s)):= |\Gamma_{large} (v)\ \cap\ T_{large}(s)| <
|
alpar@3
|
692 |
|\Gamma_{small} (u)\ \cap\ T_{small}(s)| \vee |\Gamma_{large}(v)\cap
|
alpar@3
|
693 |
\tilde{T}_{large}(s)| < |\Gamma_{small}(u)\cap \tilde{T}_{small}(s)|$
|
alpar@3
|
694 |
is \textbf{not} a cutting function by $SUB$.
|
alpar@2
|
695 |
\end{claim}
|
alpar@2
|
696 |
\begin{proof}$ $\\
|
alpar@2
|
697 |
\vspace*{-0.5cm}
|
alpar@2
|
698 |
|
alpar@2
|
699 |
\begin{figure}
|
alpar@2
|
700 |
\begin{center}
|
alpar@2
|
701 |
\begin{tikzpicture}
|
alpar@2
|
702 |
[scale=.8,auto=left,every node/.style={circle,fill=black!15}]
|
alpar@3
|
703 |
\node[rectangle,fill=black!15] at (4,6) {$G_{small}$}; \node (u4) at
|
alpar@3
|
704 |
(2.5,10) {$u_4$}; \node (u3) at (5.5,10) {$u_3$}; \node (u1) at
|
alpar@3
|
705 |
(2.5,7) {$u_1$}; \node (u2) at (5.5,7) {$u_2$};
|
alpar@2
|
706 |
|
alpar@2
|
707 |
\node[rectangle,fill=black!30] at (13.5,6) {$G_{large}$};
|
alpar@3
|
708 |
\node[fill=black!30] (v4) at (12,10) {$v_4$}; \node[fill=black!30]
|
alpar@3
|
709 |
(v3) at (15,10) {$v_3$}; \node[fill=black!30] (v1) at (12,7)
|
alpar@3
|
710 |
{$v_1$}; \node[fill=black!30] (v2) at (15,7) {$v_2$};
|
alpar@2
|
711 |
|
alpar@2
|
712 |
|
alpar@3
|
713 |
\foreach \from/\to in {u1/u2,u2/u3,u3/u4,u4/u1} \draw (\from) --
|
alpar@3
|
714 |
(\to); \foreach \from/\to in {v1/v2,v2/v3,v3/v4,v4/v1,v1/v3} \draw
|
alpar@3
|
715 |
(\from) -- (\to);
|
alpar@2
|
716 |
% \draw[dashed] (\from) -- (\to);
|
alpar@2
|
717 |
\end{tikzpicture}
|
alpar@4
|
718 |
\caption{Graphs for the proof of Claim~\ref{claimSUB}}
|
alpar@4
|
719 |
\label{fig:proofSUB}
|
alpar@2
|
720 |
\end{center}
|
alpar@2
|
721 |
\end{figure}
|
alpar@4
|
722 |
Let the two graphs of Figure~\ref{fig:proofSUB} be the input
|
alpar@3
|
723 |
graphs. Suppose the total ordering relation is $u_1 \prec u_2 \prec
|
alpar@3
|
724 |
u_3 \prec u_4$,$M(s)\!=\{(u_1,v_1)\}$, and VF2 tries to add
|
alpar@3
|
725 |
$(u_2,v_2)\in P(s)$.\newline $Cons_{SUB}((u_2,v_2),M(s))=true$, so
|
alpar@3
|
726 |
$M\cup \{(u_2,v_2)\}$ is consistent by $SUB$. The cutting function
|
alpar@3
|
727 |
$Cut_{SUB}((u_2,v_2),M(s))$ is false, so it does not let cut the
|
alpar@3
|
728 |
tree.\newline On the other hand $Cut_{SUB}'((u_2,v_2),M(s))$ is true,
|
alpar@3
|
729 |
since\\$0=|\Gamma_{large}(v_2)\cap
|
alpar@3
|
730 |
\tilde{T}_{large}(s)|<|\Gamma_{small}(u_2)\cap
|
alpar@3
|
731 |
\tilde{T}_{small}(s)|=1$ is true, but still the tree can not be
|
alpar@3
|
732 |
pruned, because otherwise the
|
alpar@3
|
733 |
$\{(u_1,v_1)(u_2,v_2)(u_3,v_3)(u_4,v_4)\}$ mapping can not be found.
|
alpar@2
|
734 |
\end{proof}
|
alpar@2
|
735 |
|
alpar@2
|
736 |
\section{The VF2++ Algorithm}
|
alpar@3
|
737 |
Although any total ordering relation makes the search space of VF2 a
|
alpar@3
|
738 |
tree, its choice turns out to dramatically influence the number of
|
alpar@3
|
739 |
visited states. The goal is to determine an efficient one as quickly
|
alpar@3
|
740 |
as possible.
|
alpar@2
|
741 |
|
alpar@3
|
742 |
The main reason for VF2++' superiority over VF2 is twofold. Firstly,
|
alpar@3
|
743 |
taking into account the structure and the node labeling of the graph,
|
alpar@3
|
744 |
VF2++ determines a state order in which most of the unfruitful
|
alpar@3
|
745 |
branches of the search space can be pruned immediately. Secondly,
|
alpar@3
|
746 |
introducing more efficient --- nevertheless still easier to compute
|
alpar@3
|
747 |
--- cutting rules reduces the chance of going astray even further.
|
alpar@2
|
748 |
|
alpar@3
|
749 |
In addition to the usual subgraph isomorphism, specialized versions
|
alpar@3
|
750 |
for induced subgraph isomorphism and for graph isomorphism have been
|
alpar@3
|
751 |
designed. VF2++ has gained a runtime improvement of one order of
|
alpar@3
|
752 |
magnitude respecting induced subgraph isomorphism and a better
|
alpar@3
|
753 |
asymptotical behaviour in the case of graph isomorphism problem.
|
alpar@2
|
754 |
|
alpar@3
|
755 |
Note that a weaker version of the cutting rules and the more efficient
|
alpar@3
|
756 |
candidate set calculating were described in \cite{VF2Plus}, too.
|
alpar@2
|
757 |
|
alpar@3
|
758 |
It should be noted that all the methods described in this section are
|
alpar@3
|
759 |
extendable to handle directed graphs and edge labels as well.
|
alpar@2
|
760 |
|
alpar@3
|
761 |
The basic ideas and the detailed description of VF2++ are provided in
|
alpar@3
|
762 |
the following.
|
alpar@2
|
763 |
|
alpar@2
|
764 |
\subsection{Preparations}
|
alpar@2
|
765 |
\begin{claim}
|
alpar@2
|
766 |
\label{claim:claimCoverFromLeft}
|
alpar@3
|
767 |
The total ordering relation uniquely determines a node order, in which
|
alpar@3
|
768 |
the nodes of $V_{small}$ will be covered by VF2. From the point of
|
alpar@3
|
769 |
view of the matching procedure, this means, that always the same node
|
alpar@3
|
770 |
of $G_{small}$ will be covered on the d-th level.
|
alpar@2
|
771 |
\end{claim}
|
alpar@2
|
772 |
\begin{proof}
|
alpar@3
|
773 |
In order to make the search space a tree, the pairs in $\{(u,v)\in
|
alpar@3
|
774 |
P(s) : \exists \hat{u} : \hat{u}\prec u\}$ are excluded from $P(s)$.
|
alpar@2
|
775 |
\newline
|
alpar@3
|
776 |
Let $\tilde{P}(s):=P(s)\backslash \{(u,v)\in P(s) : \exists \hat{u} :
|
alpar@3
|
777 |
\hat{u}\prec u\}$
|
alpar@2
|
778 |
\newline
|
alpar@3
|
779 |
The relation $\prec$ is a total ordering, so $\exists!\ \tilde{u} :
|
alpar@3
|
780 |
\forall\ (u,v)\in \tilde{P}(s): u=\tilde u$. Since a pair form
|
alpar@3
|
781 |
$\tilde{P}(s)$ is chosen for including into $M(s)$, it is obvious,
|
alpar@3
|
782 |
that only $\tilde{u}$ can be covered in $V_{small}$. Actually,
|
alpar@3
|
783 |
$\tilde{u}$ is the smallest element in $T_{small}(s)$ (or in
|
alpar@3
|
784 |
$V_{small}\backslash M_{small}(s)$, if $T_{small}(s)$ were empty), and
|
alpar@3
|
785 |
$T_{small}(s)$ depends only on the covered nodes of $G_{small}$.
|
alpar@2
|
786 |
\newline
|
alpar@3
|
787 |
Simple induction on $d$ shows that the set of covered nodes of
|
alpar@3
|
788 |
$G_{small}$ is unique, if $d$ is given, so $\tilde{u}$ is unique if
|
alpar@3
|
789 |
$d$ is given.
|
alpar@2
|
790 |
\end{proof}
|
alpar@2
|
791 |
|
alpar@2
|
792 |
\begin{definition}
|
alpar@3
|
793 |
An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{small}|)})$ of
|
alpar@3
|
794 |
$V_{small}$ is \textbf{matching order}, if exists $\prec$ total
|
alpar@3
|
795 |
ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
|
alpar@3
|
796 |
pair for $u_{\sigma(d)}$ for all $d\in\{1,..,|V_{small}|\}$.
|
alpar@2
|
797 |
\end{definition}
|
alpar@2
|
798 |
|
alpar@2
|
799 |
\begin{claim}\label{claim:MOclaim}
|
alpar@3
|
800 |
A total ordering is matching order, iff the nodes of every component
|
alpar@3
|
801 |
form an interval in the node sequence, and every node connects to a
|
alpar@3
|
802 |
previous node in its component except the first node of the
|
alpar@3
|
803 |
component. The order of the components is arbitrary. \\Formally
|
alpar@3
|
804 |
spoken, an order
|
alpar@3
|
805 |
$(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{small}|)})$ of
|
alpar@3
|
806 |
$V_{small}$ is matching order $\Leftrightarrow$ $\forall
|
alpar@3
|
807 |
G'_{small}=(V'_{small},E'_{small})\ component\ of\ G_{small}: \forall
|
alpar@3
|
808 |
i: (\exists j : j<i\wedge u_{\sigma(j)},u_{\sigma(i)}\in
|
alpar@3
|
809 |
V'_{small})\Rightarrow \exists k : k < i \wedge (\forall l: k\leq
|
alpar@3
|
810 |
l\leq i \Rightarrow u_{l}\in V'_{small}) \wedge
|
alpar@3
|
811 |
(u_{\sigma{(k)}},u_{\sigma{(i)}})\in E'_{small}$, where $i,j,k,l\in
|
alpar@3
|
812 |
\{1,..,|V_{small}|\}$\newline
|
alpar@2
|
813 |
\end{claim}
|
alpar@2
|
814 |
\begin{proof}
|
alpar@3
|
815 |
Suppose a matching order is given. It has to be shown that the node
|
alpar@3
|
816 |
sequence has a structure described above.\\ Let
|
alpar@3
|
817 |
$G'_{small}=(V'_{small},E'_{small})$ be an arbitrary component and $i$
|
alpar@3
|
818 |
an arbitrary index.
|
alpar@2
|
819 |
\newline
|
alpar@3
|
820 |
$(\exists j : j<i\wedge u_{\sigma(j)},u_{\sigma(i)}\in
|
alpar@3
|
821 |
V'_{small})\Rightarrow u_{\sigma(i)}$ is not the first covered node in
|
alpar@3
|
822 |
$G_{small}\ \Rightarrow u_{\sigma(i)}$ is connected to a covered node
|
alpar@3
|
823 |
$u_{\sigma(k)}$ where $k<i$, since $u_{\sigma(i)}\in T_{small}(s)$ for
|
alpar@3
|
824 |
some $s$ and $T_{small}(s)\subseteq{V'_{small}}$ contains only nodes
|
alpar@3
|
825 |
connected to at least one covered node. It's easy to see, that
|
alpar@3
|
826 |
$\forall l: k\leq l\leq i \Rightarrow u_{l}\in V'_{small}$, since
|
alpar@3
|
827 |
$T_{small}(s)$ contains only nodes connected to some covered ones,
|
alpar@3
|
828 |
while it is not empty, but if it were empty, then $u_{\sigma(k)}$ and
|
alpar@3
|
829 |
$u_{\sigma(i)}$ were not in the same component.
|
alpar@2
|
830 |
|
alpar@3
|
831 |
Now, let us show that if a node sequence has the special structure
|
alpar@3
|
832 |
described above, it has to be matching
|
alpar@3
|
833 |
order.\\ $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{small}|)})$ is
|
alpar@3
|
834 |
a total ordering, which determines the matching order
|
alpar@3
|
835 |
$(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{small}|)})$.
|
alpar@2
|
836 |
\end{proof}
|
alpar@2
|
837 |
|
alpar@3
|
838 |
To summing up, a total ordering always uniquely determines a matching
|
alpar@3
|
839 |
order, and every matching order can be determined by a total ordering,
|
alpar@3
|
840 |
however, more than one different total orderings may determine the
|
alpar@3
|
841 |
same matching order.
|
alpar@2
|
842 |
\subsection{Idea behind the algorithm}
|
alpar@3
|
843 |
The goal is to find a matching order in which the algorithm is able to
|
alpar@3
|
844 |
recognize inconsistency or prune the infeasible branches on the
|
alpar@3
|
845 |
highest levels and goes deep only if it is needed.
|
alpar@2
|
846 |
|
alpar@2
|
847 |
\begin{notation}
|
alpar@3
|
848 |
Let $\mathbf{Conn_{H}(u)}:=|\Gamma_{small}(u)\cap H\}|$, that is the
|
alpar@3
|
849 |
number of neighbours of u which are in H, where $u\in V_{small} $ and
|
alpar@3
|
850 |
$H\subseteq V_{small}$.
|
alpar@2
|
851 |
\end{notation}
|
alpar@2
|
852 |
|
alpar@3
|
853 |
The principal question is the following. Suppose a state $s$ is
|
alpar@3
|
854 |
given. For which node of $T_{small}(s)$ is the hardest to find a
|
alpar@3
|
855 |
consistent pair in $G_{large}$? The more covered neighbours a node in
|
alpar@3
|
856 |
$T_{small}(s)$ has --- i.e. the largest $Conn_{M_{small}(s)}$ it has
|
alpar@3
|
857 |
---, the more rarely satisfiable consistency constraints for its pair
|
alpar@3
|
858 |
are given.
|
alpar@2
|
859 |
|
alpar@3
|
860 |
In biology, most of the graphs are sparse, thus several nodes in
|
alpar@3
|
861 |
$T_{small}(s)$ may have the same $Conn_{M_{small}(s)}$, which makes
|
alpar@3
|
862 |
reasonable to define a secondary and a tertiary order between them.
|
alpar@3
|
863 |
The observation above proves itself to be as determining, that the
|
alpar@3
|
864 |
secondary ordering prefers nodes with the most uncovered neighbours
|
alpar@3
|
865 |
among which have the same $Conn_{M_{small}(s)}$ to increase
|
alpar@3
|
866 |
$Conn_{M_{small}(s)}$ of uncovered nodes so much, as possible. The
|
alpar@3
|
867 |
tertiary ordering prefers nodes having the rarest uncovered labels.
|
alpar@2
|
868 |
|
alpar@3
|
869 |
Note that the secondary ordering is the same as the ordering by $deg$,
|
alpar@3
|
870 |
which is a static data in front of the above used.
|
alpar@2
|
871 |
|
alpar@3
|
872 |
These rules can easily result in a matching order which contains the
|
alpar@3
|
873 |
nodes of a long path successively, whose nodes may have low $Conn$ and
|
alpar@3
|
874 |
is easily matchable into $G_{large}$. To avoid that, a BFS order is
|
alpar@3
|
875 |
used, which provides the shortest possible paths.
|
alpar@2
|
876 |
\newline
|
alpar@2
|
877 |
|
alpar@3
|
878 |
In the following, some examples on which the VF2 may be slow are
|
alpar@3
|
879 |
described, although they are easily solvable by using a proper
|
alpar@3
|
880 |
matching order.
|
alpar@2
|
881 |
|
alpar@2
|
882 |
\begin{example}
|
alpar@3
|
883 |
Suppose $G_{small}$ can be mapped into $G_{large}$ in many ways
|
alpar@3
|
884 |
without node labels. Let $u\in V_{small}$ and $v\in V_{large}$.
|
alpar@2
|
885 |
\newline
|
alpar@2
|
886 |
$lab(u):=black$
|
alpar@2
|
887 |
\newline
|
alpar@2
|
888 |
$lab(v):=black$
|
alpar@2
|
889 |
\newline
|
alpar@3
|
890 |
$lab(\tilde{u}):=red \ \forall \tilde{u}\in (V_{small}\backslash
|
alpar@3
|
891 |
\{u\})$
|
alpar@2
|
892 |
\newline
|
alpar@3
|
893 |
$lab(\tilde{v}):=red \ \forall \tilde{v}\in (V_{large}\backslash
|
alpar@3
|
894 |
\{v\})$
|
alpar@2
|
895 |
\newline
|
alpar@2
|
896 |
|
alpar@3
|
897 |
Now, any mapping by the node label $lab$ must contain $(u,v)$, since
|
alpar@3
|
898 |
$u$ is black and no node in $V_{large}$ has a black label except
|
alpar@3
|
899 |
$v$. If unfortunately $u$ were the last node which will get covered,
|
alpar@3
|
900 |
VF2 would check only in the last steps, whether $u$ can be matched to
|
alpar@3
|
901 |
$v$.
|
alpar@2
|
902 |
\newline
|
alpar@3
|
903 |
However, had $u$ been the first matched node, u would have been
|
alpar@3
|
904 |
matched immediately to v, so all the mappings would have been
|
alpar@3
|
905 |
precluded in which node labels can not correspond.
|
alpar@2
|
906 |
\end{example}
|
alpar@2
|
907 |
|
alpar@2
|
908 |
\begin{example}
|
alpar@3
|
909 |
Suppose there is no node label given, $G_{small}$ is a small graph and
|
alpar@3
|
910 |
can not be mapped into $G_{large}$ and $u\in V_{small}$.
|
alpar@2
|
911 |
\newline
|
alpar@3
|
912 |
Let $G'_{small}:=(V_{small}\cup
|
alpar@3
|
913 |
\{u'_{1},u'_{2},..,u'_{k}\},E_{small}\cup
|
alpar@3
|
914 |
\{(u,u'_{1}),(u'_{1},u'_{2}),..,(u'_{k-1},u'_{k})\})$, that is,
|
alpar@3
|
915 |
$G'_{small}$ is $G_{small}\cup \{ a\ k$ long path, which is disjoint
|
alpar@3
|
916 |
from $G_{small}$ and one of its starting points is connected to $u\in
|
alpar@3
|
917 |
V_{small}\}$.
|
alpar@2
|
918 |
\newline
|
alpar@3
|
919 |
Is there a subgraph of $G_{large}$, which is isomorph with
|
alpar@3
|
920 |
$G'_{small}$?
|
alpar@2
|
921 |
\newline
|
alpar@3
|
922 |
If unfortunately the nodes of the path were the first $k$ nodes in the
|
alpar@3
|
923 |
matching order, the algorithm would iterate through all the possible k
|
alpar@3
|
924 |
long paths in $G_{large}$, and it would recognize that no path can be
|
alpar@3
|
925 |
extended to $G'_{small}$.
|
alpar@2
|
926 |
\newline
|
alpar@3
|
927 |
However, had it started by the matching of $G_{small}$, it would not
|
alpar@3
|
928 |
have matched any nodes of the path.
|
alpar@2
|
929 |
\end{example}
|
alpar@2
|
930 |
|
alpar@3
|
931 |
These examples may look artificial, but the same problems also appear
|
Madarasi@7
|
932 |
in real-world instances, even though in a less obvious way.
|
alpar@2
|
933 |
|
alpar@2
|
934 |
\subsection{Total ordering}
|
alpar@3
|
935 |
Instead of the total ordering relation, the matching order will be
|
alpar@3
|
936 |
searched directly.
|
alpar@2
|
937 |
\begin{notation}
|
alpar@3
|
938 |
Let \textbf{F$_\mathcal{M}$(l)}$:=|\{v\in V_{large} :
|
alpar@3
|
939 |
l=lab(v)\}|-|\{u\in V_{small}\backslash \mathcal{M} : l=lab(u)\}|$ ,
|
alpar@3
|
940 |
where $l$ is a label and $\mathcal{M}\subseteq V_{small}$.
|
alpar@2
|
941 |
\end{notation}
|
alpar@2
|
942 |
|
alpar@2
|
943 |
\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u : u\in S \wedge f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{-f}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
|
alpar@2
|
944 |
\end{definition}
|
alpar@2
|
945 |
|
alpar@2
|
946 |
\begin{algorithm}
|
Madarasi@8
|
947 |
\algtext*{EndIf}
|
Madarasi@8
|
948 |
\algtext*{EndProcedure}
|
alpar@2
|
949 |
\algtext*{EndWhile}
|
alpar@2
|
950 |
\caption{\hspace{0.5cm}$The\ method\ of\ VF2++\ for\ determining\ the\ node\ order$}\label{alg:VF2PPPseu}
|
alpar@2
|
951 |
\begin{algorithmic}[1]
|
alpar@3
|
952 |
\Procedure{VF2++order}{} \State $\mathcal{M}$ := $\emptyset$
|
alpar@3
|
953 |
\Comment{matching order} \While{$V_{small}\backslash \mathcal{M}
|
alpar@3
|
954 |
\neq\emptyset$} \State $r\in$ arg max$_{deg}$ (arg
|
alpar@3
|
955 |
min$_{F_\mathcal{M}\circ lab}(V_{small}\backslash
|
alpar@3
|
956 |
\mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
|
alpar@3
|
957 |
root node $r$. \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
|
alpar@3
|
958 |
$d$-th level \State Process $V_d$ \Comment{See Algorithm
|
Madarasi@8
|
959 |
\ref{alg:VF2PPProcess1}} \EndFor
|
alpar@3
|
960 |
\EndWhile \EndProcedure
|
alpar@2
|
961 |
\end{algorithmic}
|
alpar@2
|
962 |
\end{algorithm}
|
alpar@2
|
963 |
|
alpar@2
|
964 |
\begin{algorithm}
|
Madarasi@8
|
965 |
\algtext*{EndIf}
|
Madarasi@8
|
966 |
\algtext*{EndProcedure}%ne nyomtasson ..
|
alpar@2
|
967 |
\algtext*{EndWhile}
|
Madarasi@8
|
968 |
\caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
|
alpar@2
|
969 |
\begin{algorithmic}[1]
|
alpar@3
|
970 |
\Procedure{VF2++ProcessLevel1}{$V_{d}$} \While{$V_d\neq\emptyset$}
|
alpar@3
|
971 |
\State $m\in$ arg min$_{F_\mathcal{M}\circ\ lab}($ arg max$_{deg}($arg
|
alpar@3
|
972 |
max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
|
alpar@3
|
973 |
\State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
|
alpar@3
|
974 |
$F_\mathcal{M}$ \EndWhile \EndProcedure
|
alpar@2
|
975 |
\end{algorithmic}
|
alpar@2
|
976 |
\end{algorithm}
|
alpar@2
|
977 |
|
alpar@4
|
978 |
Algorithm~\ref{alg:VF2PPPseu} is a high level description of the
|
alpar@4
|
979 |
matching order procedure of VF2++. It computes a BFS tree for each
|
alpar@3
|
980 |
component in ascending order of their rarest $lab$ and largest $deg$,
|
alpar@4
|
981 |
whose root vertex is the component's minimal
|
Madarasi@8
|
982 |
node. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
|
Madarasi@8
|
983 |
lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
|
Madarasi@8
|
984 |
to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
|
alpar@2
|
985 |
|
alpar@4
|
986 |
Claim~\ref{claim:MOclaim} shows that Algorithm~\ref{alg:VF2PPPseu}
|
alpar@4
|
987 |
provides a matching order.
|
alpar@2
|
988 |
|
alpar@2
|
989 |
|
alpar@2
|
990 |
\subsection{Cutting rules}
|
alpar@2
|
991 |
\label{VF2PPCuttingRules}
|
alpar@3
|
992 |
This section presents the cutting rules of VF2++, which are improved
|
alpar@3
|
993 |
by using extra information coming from the node labels.
|
alpar@2
|
994 |
\begin{notation}
|
alpar@3
|
995 |
Let $\mathbf{\Gamma_{small}^{l}(u)}:=\{\tilde{u} : lab(\tilde{u})=l
|
alpar@3
|
996 |
\wedge \tilde{u}\in \Gamma_{small} (u)\}$ and
|
alpar@3
|
997 |
$\mathbf{\Gamma_{large}^{l}(v)}:=\{\tilde{v} : lab(\tilde{v})=l \wedge
|
alpar@3
|
998 |
\tilde{v}\in \Gamma_{large} (v)\}$, where $u\in V_{small}$, $v\in
|
alpar@3
|
999 |
V_{large}$ and $l$ is a label.
|
alpar@2
|
1000 |
\end{notation}
|
alpar@2
|
1001 |
|
alpar@2
|
1002 |
\subsubsection{Induced subgraph isomorphism}
|
alpar@2
|
1003 |
\begin{claim}
|
alpar@2
|
1004 |
\[LabCut_{IND}((u,v),M(s))\!:=\!\!\!\!\!\bigvee_{l\ is\ label}\!\!\!\!\!\!\!|\Gamma_{large}^{l} (v) \cap T_{large}(s)|\!<\!|\Gamma_{small}^{l}(u)\cap T_{small}(s)|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{large}^{l}(v)\cap \tilde{T}_{large}(s)| < |\Gamma_{small}^{l}(u)\cap \tilde{T}_{small}(s)|\] is a cutting function by IND.
|
alpar@2
|
1005 |
\end{claim}
|
alpar@2
|
1006 |
\begin{proof}
|
alpar@3
|
1007 |
It has to be shown, that $LabCut_{IND}((u,v),M(s))=true\Rightarrow$
|
alpar@3
|
1008 |
the mapping can not be extended to a whole
|
alpar@3
|
1009 |
mapping.\\ $LabCut_{IND}((u,v),M(s))=true,$ iff the following
|
alpar@3
|
1010 |
holds. $\\\exists l: |\Gamma_{large}^{l} (v)\ \cap\ T_{large}(s)| <
|
alpar@3
|
1011 |
|\Gamma_{small}^{l} (u)\ \cap\ T_{small}(s)| \vee
|
alpar@3
|
1012 |
|\Gamma_{large}^{l}(v)\cap \tilde{T}_{large}(s)| <
|
alpar@3
|
1013 |
|\Gamma_{small}^{l}(u)\cap \tilde{T}_{small}(s)|$.
|
alpar@2
|
1014 |
|
alpar@3
|
1015 |
Suppose that $|\Gamma_{large}^{l} (v)\ \cap\ T_{large}(s)| <
|
alpar@3
|
1016 |
|\Gamma_{small}^{l} (u)\ \cap\ T_{small}(s)|$. Each node of
|
alpar@3
|
1017 |
$\Gamma_{small}^{l} (u)\ \cap\ T_{small}(s)$ has to be matched to a
|
alpar@3
|
1018 |
node in $\Gamma_{large}^{l} (v)\ \cap\ T_{large}(s)$, so
|
alpar@3
|
1019 |
$\Gamma_{large}^{l} (v)\ \cap\ T_{large}(s)$ can not be smaller than
|
alpar@3
|
1020 |
$\Gamma_{small}^{l} (u)\ \cap\ T_{small}(s)$. That is why $M(s)$ can
|
alpar@3
|
1021 |
not be extended to a whole mapping.
|
alpar@2
|
1022 |
|
alpar@3
|
1023 |
Otherwise $|\Gamma_{large}^{l}(v)\cap \tilde{T}_{large}(s)| <
|
alpar@3
|
1024 |
|\Gamma_{small}^{l}(u)\cap \tilde{T}_{small}(s)|$ has to be
|
alpar@3
|
1025 |
true. Similarly, each node of $\Gamma_{large}^{l}(v)\cap
|
alpar@3
|
1026 |
\tilde{T}_{large}(s)$ has to be matched to a node in
|
alpar@3
|
1027 |
$\Gamma_{small}^{l}(u)\cap \tilde{T}_{small}(s)$,
|
alpar@3
|
1028 |
i.e. $\Gamma_{large}^{l}(v)\cap \tilde{T}_{large}(s)$ can not be
|
alpar@3
|
1029 |
smaller than $\Gamma_{small}^{l}(u)\cap \tilde{T}_{small}(s)$, if
|
alpar@3
|
1030 |
$M(s)$ is extendible.
|
alpar@2
|
1031 |
\end{proof}
|
alpar@2
|
1032 |
The following claims can be proven similarly.
|
alpar@2
|
1033 |
\subsubsection{Graph isomorphism}
|
alpar@2
|
1034 |
\begin{claim}
|
alpar@2
|
1035 |
\[LabCut_{ISO}((u,v),M(s))\!:=\!\!\!\!\!\bigvee_{l\ is\ label}\!\!\!\!\!\!\!|\Gamma_{large}^{l} (v) \cap T_{large}(s)|\!\neq\!|\Gamma_{small}^{l}(u)\cap T_{small}(s)|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{large}^{l}(v)\cap \tilde{T}_{large}(s)| \neq |\Gamma_{small}^{l}(u)\cap \tilde{T}_{small}(s)|\] is a cutting function by ISO.
|
alpar@2
|
1036 |
\end{claim}
|
alpar@2
|
1037 |
|
alpar@2
|
1038 |
\subsubsection{Subgraph isomorphism}
|
alpar@2
|
1039 |
\begin{claim}
|
Madarasi@7
|
1040 |
\[LabCut_{SUB}((u,v),M(s))\!:=\!\!\!\!\!\bigvee_{l\ is\ label}\!\!\!\!\!\!\!|\Gamma_{large}^{l} (v) \cap T_{large}(s)|\!<\!|\Gamma_{small}^{l}(u)\cap T_{small}(s)|\] is a cutting function by SUB.
|
alpar@2
|
1041 |
\end{claim}
|
alpar@2
|
1042 |
|
alpar@2
|
1043 |
|
alpar@2
|
1044 |
|
alpar@2
|
1045 |
\subsection{Implementation details}
|
alpar@3
|
1046 |
This section provides a detailed summary of an efficient
|
alpar@3
|
1047 |
implementation of VF2++.
|
alpar@2
|
1048 |
\subsubsection{Storing a mapping}
|
alpar@3
|
1049 |
After fixing an arbitrary node order ($u_0, u_1, ..,
|
alpar@3
|
1050 |
u_{|G_{small}|-1}$) of $G_{small}$, an array $M$ is usable to store
|
alpar@3
|
1051 |
the current mapping in the following way.
|
alpar@2
|
1052 |
\[
|
alpar@3
|
1053 |
M[i] =
|
alpar@2
|
1054 |
\begin{cases}
|
alpar@3
|
1055 |
v & if\ (u_i,v)\ is\ in\ the\ mapping\\ INVALID &
|
alpar@3
|
1056 |
if\ no\ node\ has\ been\ mapped\ to\ u_i.
|
alpar@2
|
1057 |
\end{cases}
|
alpar@2
|
1058 |
\]
|
alpar@3
|
1059 |
Where $i\in\{0,1, ..,|G_{small}|-1\}$, $v\in V_{large}$ and $INVALID$
|
alpar@3
|
1060 |
means "no node".
|
alpar@2
|
1061 |
\subsubsection{Avoiding the recurrence}
|
alpar@4
|
1062 |
The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
|
Madarasi@9
|
1063 |
as a \textit{while loop}, which has a loop counter $depth$ denoting the
|
Madarasi@9
|
1064 |
all-time depth of the recursion. Fixing a matching order, let $M$
|
Madarasi@9
|
1065 |
denote the array storing the all-time mapping. Based on Claim~\ref{claim:claimCoverFromLeft},
|
alpar@3
|
1066 |
$M$ is $INVALID$ from index $depth$+1 and not $INVALID$ before
|
Madarasi@9
|
1067 |
$depth$. $M[depth]$ changes
|
alpar@3
|
1068 |
while the state is being processed, but the property is held before
|
alpar@3
|
1069 |
both stepping back to a predecessor state and exploring a successor
|
alpar@3
|
1070 |
state.
|
alpar@2
|
1071 |
|
alpar@3
|
1072 |
The necessary part of the candidate set is easily maintainable or
|
alpar@3
|
1073 |
computable by following
|
alpar@4
|
1074 |
Section~\ref{candidateComputingVF2}. A much faster method
|
alpar@3
|
1075 |
has been designed for biological- and sparse graphs, see the next
|
alpar@3
|
1076 |
section for details.
|
alpar@2
|
1077 |
|
alpar@2
|
1078 |
\subsubsection{Calculating the candidates for a node}
|
alpar@4
|
1079 |
Being aware of Claim~\ref{claim:claimCoverFromLeft}, the
|
alpar@3
|
1080 |
task is not to maintain the candidate set, but to generate the
|
alpar@3
|
1081 |
candidate nodes in $G_{large}$ for a given node $u\in V_{small}$. In
|
alpar@3
|
1082 |
case of an expanding problem type and $M$ mapping, if a node $v\in
|
alpar@3
|
1083 |
V_{large}$ is a potential pair of $u\in V_{small}$, then $\forall
|
alpar@3
|
1084 |
u'\in V_{small} : (u,u')\in
|
alpar@3
|
1085 |
E_{small}\ and\ u'\ is\ covered\ by\ M\ \Rightarrow (v,Pair(M,u'))\in
|
alpar@3
|
1086 |
E_{large}$. That is, each covered neighbour of $u$ has to be mapped to
|
alpar@3
|
1087 |
a covered neighbour of $v$.
|
alpar@2
|
1088 |
|
alpar@3
|
1089 |
Having said that, an algorithm running in $\Theta(deg)$ time is
|
alpar@3
|
1090 |
describable if there exists a covered node in the component containing
|
Madarasi@9
|
1091 |
$u$, and a linear one other wise.
|
alpar@2
|
1092 |
|
alpar@2
|
1093 |
|
alpar@2
|
1094 |
\subsubsection{Determining the node order}
|
alpar@3
|
1095 |
This section describes how the node order preprocessing method of
|
alpar@3
|
1096 |
VF2++ can efficiently be implemented.
|
alpar@2
|
1097 |
|
alpar@3
|
1098 |
For using lookup tables, the node labels are associated with the
|
alpar@3
|
1099 |
numbers $\{0,1,..,|K|-1\}$, where $K$ is the set of the labels. It
|
Madarasi@9
|
1100 |
enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
|
alpar@3
|
1101 |
$\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
|
alpar@3
|
1102 |
in $V_{small}$ having label i, which is easy to compute in
|
alpar@3
|
1103 |
$\Theta(|V_{small}|)$ steps.
|
alpar@2
|
1104 |
|
Madarasi@9
|
1105 |
Representing $\mathcal{M}\subseteq V_{small}$ as an array of
|
Madarasi@9
|
1106 |
size $|V_{small}|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done inplace by swapping nodes.
|
alpar@2
|
1107 |
|
alpar@3
|
1108 |
After a node $u$ gets to the next place of the node order,
|
alpar@3
|
1109 |
$F_\mathcal{M}[lab[u]]$ has to be decreased by one, because there is
|
alpar@3
|
1110 |
one less covered node in $V_{large}$ with label $lab(u)$, that is why
|
alpar@3
|
1111 |
min selection sort is preferred which gives the elements from left to
|
alpar@4
|
1112 |
right in descending order, see Algorithm~\ref{alg:VF2PPProcess1}.
|
alpar@2
|
1113 |
|
alpar@2
|
1114 |
\subsubsection{Cutting rules}
|
alpar@4
|
1115 |
In Section~\ref{VF2PPCuttingRules}, the cutting rules were
|
alpar@3
|
1116 |
described using the sets $T_{small}$, $T_{large}$, $\tilde T_{small}$
|
alpar@3
|
1117 |
and $\tilde T_{large}$, which are dependent on the all-time mapping
|
alpar@3
|
1118 |
(i.e. on the all-time state). The aim is to check the labeled cutting
|
alpar@3
|
1119 |
rules of VF2++ in $\Theta(deg)$ time.
|
alpar@2
|
1120 |
|
alpar@3
|
1121 |
Firstly, suppose that these four sets are given in such a way, that
|
alpar@3
|
1122 |
checking whether a node is in a certain set takes constant time,
|
alpar@3
|
1123 |
e.g. they are given by their 0-1 characteristic vectors. Let $L$ be an
|
alpar@3
|
1124 |
initially zero integer lookup table of size $|K|$. After incrementing
|
alpar@3
|
1125 |
$L[lab(u')]$ for all $u'\in \Gamma_{small}(u) \cap T_{small}(s)$ and
|
alpar@3
|
1126 |
decrementing $L[lab(v')]$ for all $v'\in\Gamma_{large} (v) \cap
|
alpar@3
|
1127 |
T_{large}(s)$, the first part of the cutting rules is checkable in
|
alpar@3
|
1128 |
$\Theta(deg)$ time by considering the proper signs of $L$. Setting $L$
|
alpar@3
|
1129 |
to zero takes $\Theta(deg)$ time again, which makes it possible to use
|
Madarasi@9
|
1130 |
the same table through the whole algorithm. The second part of the
|
alpar@3
|
1131 |
cutting rules can be verified using the same method with $\tilde
|
alpar@3
|
1132 |
T_{small}$ and $\tilde T_{large}$ instead of $T_{small}$ and
|
alpar@3
|
1133 |
$T_{large}$. Thus, the overall complexity is $\Theta(deg)$.
|
alpar@2
|
1134 |
|
alpar@3
|
1135 |
An other integer lookup table storing the number of covered neighbours
|
alpar@3
|
1136 |
of each node in $G_{large}$ gives all the information about the sets
|
alpar@3
|
1137 |
$T_{large}$ and $\tilde T_{large}$, which is maintainable in
|
alpar@3
|
1138 |
$\Theta(deg)$ time when a pair is added or substracted by incrementing
|
alpar@3
|
1139 |
or decrementing the proper indices. A further improvement is that the
|
alpar@3
|
1140 |
values of $L[lab(u')]$ in case of checking $u$ is dependent only on
|
alpar@3
|
1141 |
$u$, i.e. on the size of the mapping, so for each $u\in V_{small}$ an
|
alpar@3
|
1142 |
array of pairs (label, number of such labels) can be stored to skip
|
alpar@3
|
1143 |
the maintaining operations. Note that these arrays are at most of size
|
alpar@3
|
1144 |
$deg$. Skipping this trick, the number of covered neighbours has to be
|
alpar@3
|
1145 |
stored for each node of $G_{small}$ as well to get the sets
|
alpar@3
|
1146 |
$T_{small}$ and $\tilde T_{small}$.
|
alpar@2
|
1147 |
|
alpar@3
|
1148 |
Using similar tricks, the consistency function can be evaluated in
|
alpar@3
|
1149 |
$\Theta(deg)$ steps, as well.
|
alpar@2
|
1150 |
|
alpar@2
|
1151 |
\section{The VF2 Plus Algorithm}
|
alpar@3
|
1152 |
The VF2 Plus algorithm is a recently improved version of VF2. It was
|
alpar@3
|
1153 |
compared with the state of the art algorithms in \cite{VF2Plus} and
|
alpar@3
|
1154 |
has proven itself to be competitive with RI, the best algorithm on
|
alpar@3
|
1155 |
biological graphs. \\ A short summary of VF2 Plus follows, which uses
|
alpar@3
|
1156 |
the notation and the conventions of the original paper.
|
alpar@2
|
1157 |
|
alpar@2
|
1158 |
\subsection{Ordering procedure}
|
alpar@3
|
1159 |
VF2 Plus uses a sorting procedure that prefers nodes in $V_{small}$
|
alpar@3
|
1160 |
with the lowest probability to find a pair in $V_{small}$ and the
|
alpar@3
|
1161 |
highest number of connections with the nodes already sorted by the
|
alpar@3
|
1162 |
algorithm.
|
alpar@2
|
1163 |
|
alpar@2
|
1164 |
\begin{definition}
|
alpar@3
|
1165 |
$(u,v)$ is a \textbf{feasible pair}, if $lab(u)=lab(v)$ and
|
alpar@3
|
1166 |
$deg(u)\leq deg(v)$, where $u\in{V_{small}}$ and $ v\in{V_{large}}$.
|
alpar@2
|
1167 |
\end{definition}
|
alpar@3
|
1168 |
$P_{lab}(L):=$ a priori probability to find a node with label $L$ in
|
alpar@3
|
1169 |
$V_{large}$
|
alpar@2
|
1170 |
\newline
|
alpar@3
|
1171 |
$P_{deg}(d):=$ a priori probability to find a node with degree $d$ in
|
alpar@3
|
1172 |
$V_{large}$
|
alpar@2
|
1173 |
\newline
|
alpar@3
|
1174 |
$P(u):=P_{lab}(L)*\bigcup_{d'>d}P_{deg}(d')$\\ $M$ is the set of
|
alpar@3
|
1175 |
already sorted nodes, $T$ is the set of nodes candidate to be
|
alpar@3
|
1176 |
selected, and $degreeM$ of a node is the number of its neighbours in
|
alpar@3
|
1177 |
$M$.
|
alpar@2
|
1178 |
\begin{algorithm}
|
alpar@3
|
1179 |
\algtext*{EndIf}%ne nyomtasson end if-et \algtext*{EndFor}%ne
|
alpar@3
|
1180 |
nyomtasson .. \algtext*{EndProcedure}%ne nyomtasson ..
|
alpar@2
|
1181 |
\algtext*{EndWhile}
|
alpar@2
|
1182 |
\caption{}\label{alg:VF2PlusPseu}
|
alpar@2
|
1183 |
\begin{algorithmic}[1]
|
alpar@3
|
1184 |
\Procedure{VF2 Plus order}{} \State Select the node with the lowest
|
alpar@3
|
1185 |
$P$. \If {more nodes share the same $P$} \State select the one with
|
alpar@3
|
1186 |
maximum degree \EndIf \If {more nodes share the same $P$ and have the
|
alpar@3
|
1187 |
max degree} \State select the first \EndIf \State Put the selected
|
alpar@3
|
1188 |
node in the set $M$. \label{alg:putIn} \State Put all its unsorted
|
alpar@3
|
1189 |
neighbours in the set $T$. \If {$M\neq V_{small}$} \State From set
|
alpar@3
|
1190 |
$T$ select the node with maximum $degreeM$. \If {more nodes have
|
alpar@3
|
1191 |
maximum $degreeM$} \State Select the one with the lowest $P$ \EndIf
|
alpar@3
|
1192 |
\If {more nodes have maximum $degreeM$ and $P$} \State Select the
|
alpar@3
|
1193 |
first. \EndIf \State \textbf{goto \ref{alg:putIn}.} \EndIf
|
alpar@2
|
1194 |
\EndProcedure
|
alpar@2
|
1195 |
\end{algorithmic}
|
alpar@2
|
1196 |
\end{algorithm}
|
alpar@2
|
1197 |
|
alpar@4
|
1198 |
Using these notations, Algorithm~\ref{alg:VF2PlusPseu}
|
alpar@3
|
1199 |
provides the description of the sorting procedure.
|
alpar@2
|
1200 |
|
alpar@3
|
1201 |
Note that $P(u)$ is not the exact probability of finding a consistent
|
alpar@3
|
1202 |
pair for $u$ by choosing a node of $V_{large}$ randomly, since
|
alpar@3
|
1203 |
$P_{lab}$ and $P_{deg}$ are not independent, though calculating the
|
alpar@3
|
1204 |
real probability would take quadratic time, which may be reduced by
|
alpar@3
|
1205 |
using fittingly lookup tables.
|
alpar@2
|
1206 |
|
alpar@2
|
1207 |
\section{Experimental results}
|
alpar@3
|
1208 |
This section compares the performance of VF2++ and VF2 Plus. Both
|
alpar@3
|
1209 |
algorithms have run faster with orders of magnitude than VF2, thus its
|
alpar@3
|
1210 |
inclusion was not reasonable.
|
alpar@2
|
1211 |
\subsection{Biological graphs}
|
alpar@3
|
1212 |
The tests have been executed on a recent biological dataset created
|
alpar@3
|
1213 |
for the International Contest on Pattern Search in Biological
|
Madarasi@7
|
1214 |
Databases\cite{Content}, which has been constructed of molecule,
|
Madarasi@7
|
1215 |
protein and contact map graphs extracted from the Protein Data
|
alpar@3
|
1216 |
Bank\cite{ProteinDataBank}.
|
alpar@2
|
1217 |
|
alpar@3
|
1218 |
The molecule dataset contains small graphs with less than 100 nodes
|
alpar@3
|
1219 |
and an average degree of less than 3. The protein dataset contains
|
alpar@3
|
1220 |
graphs having 500-10 000 nodes and an average degree of 4, while the
|
alpar@3
|
1221 |
contact map dataset contains graphs with 150-800 nodes and an average
|
alpar@3
|
1222 |
degree of 20. \\
|
alpar@2
|
1223 |
|
alpar@3
|
1224 |
In the following, the induced subgraph isomorphism and the graph
|
alpar@3
|
1225 |
isomorphism will be examined.
|
alpar@2
|
1226 |
|
Madarasi@7
|
1227 |
This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For run time results, please see Figure~\ref{fig:bioIND}.
|
Madarasi@7
|
1228 |
|
Madarasi@7
|
1229 |
In an other experiment, the nodes of each graph in the database had been
|
Madarasi@7
|
1230 |
shuffled, and an isomorphism between the shuffled and the original
|
Madarasi@7
|
1231 |
graph was searched. The solution times are shown on Figure~\ref{fig:bioISO}.
|
Madarasi@7
|
1232 |
|
Madarasi@7
|
1233 |
|
alpar@2
|
1234 |
|
alpar@2
|
1235 |
\begin{figure}[H]
|
Madarasi@7
|
1236 |
\vspace*{-2cm}
|
Madarasi@7
|
1237 |
\hspace*{-1.5cm}
|
Madarasi@7
|
1238 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1239 |
\begin{figure}[H]
|
Madarasi@7
|
1240 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@7
|
1241 |
\begin{axis}[title=Molecules ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
1242 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
1243 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1244 |
format/1000 sep = \thinspace}]
|
Madarasi@7
|
1245 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
1246 |
\addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
|
Madarasi@7
|
1247 |
size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
|
Madarasi@7
|
1248 |
\end{axis}
|
Madarasi@7
|
1249 |
\end{tikzpicture}
|
Madarasi@7
|
1250 |
\caption{In the case of molecules, there is not such a significant
|
Madarasi@7
|
1251 |
difference, but VF2++ seems to be faster as the number of nodes
|
Madarasi@7
|
1252 |
increases.}\label{fig:ISOMolecule}
|
Madarasi@7
|
1253 |
\end{figure}
|
Madarasi@7
|
1254 |
\end{subfigure}
|
Madarasi@7
|
1255 |
\hspace*{1.5cm}
|
Madarasi@7
|
1256 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1257 |
\begin{figure}[H]
|
Madarasi@7
|
1258 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@7
|
1259 |
\begin{axis}[title=Contact maps ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
1260 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
1261 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1262 |
format/1000 sep = \thinspace}]
|
Madarasi@7
|
1263 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
1264 |
\addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
|
Madarasi@7
|
1265 |
size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
|
Madarasi@7
|
1266 |
\end{axis}
|
Madarasi@7
|
1267 |
\end{tikzpicture}
|
Madarasi@7
|
1268 |
\caption{The results are closer to each other on contact maps, but
|
Madarasi@7
|
1269 |
VF2++ still performs consistently better.}\label{fig:ISOContact}
|
Madarasi@7
|
1270 |
\end{figure}
|
Madarasi@7
|
1271 |
\end{subfigure}
|
Madarasi@7
|
1272 |
|
alpar@2
|
1273 |
\begin{center}
|
Madarasi@7
|
1274 |
\vspace*{-0.5cm}
|
Madarasi@7
|
1275 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1276 |
\begin{figure}[H]
|
Madarasi@7
|
1277 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@7
|
1278 |
\begin{axis}[title=Proteins ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
1279 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
1280 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1281 |
format/1000 sep = \thinspace}]
|
Madarasi@7
|
1282 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
1283 |
\addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
|
Madarasi@7
|
1284 |
size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
|
Madarasi@7
|
1285 |
\end{axis}
|
Madarasi@7
|
1286 |
\end{tikzpicture}
|
Madarasi@7
|
1287 |
\caption{On protein graphs, VF2 Plus has a super linear time
|
Madarasi@7
|
1288 |
complexity, while VF2++ runs in near constant time. The difference
|
Madarasi@7
|
1289 |
is about two order of magnitude on large graphs.}\label{fig:ISOProt}
|
Madarasi@7
|
1290 |
\end{figure}
|
Madarasi@7
|
1291 |
\end{subfigure}
|
Madarasi@7
|
1292 |
\end{center}
|
Madarasi@7
|
1293 |
\vspace*{-0.6cm}
|
Madarasi@7
|
1294 |
\caption{\normalsize{Graph isomomorphism on biological graphs}}\label{fig:bioISO}
|
Madarasi@7
|
1295 |
\end{figure}
|
Madarasi@7
|
1296 |
|
Madarasi@7
|
1297 |
|
Madarasi@7
|
1298 |
\begin{figure}[H]
|
Madarasi@7
|
1299 |
\vspace*{-2cm}
|
Madarasi@7
|
1300 |
\hspace*{-1.5cm}
|
Madarasi@7
|
1301 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1302 |
\begin{figure}[H]
|
Madarasi@7
|
1303 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@7
|
1304 |
\begin{axis}[title=Molecules IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
1305 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
1306 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1307 |
format/1000 sep = \thinspace}]
|
Madarasi@7
|
1308 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
1309 |
\addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
|
Madarasi@7
|
1310 |
size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
|
Madarasi@7
|
1311 |
\end{axis}
|
Madarasi@7
|
1312 |
\end{tikzpicture}
|
Madarasi@7
|
1313 |
\caption{In the case of molecules, the algorithms have
|
Madarasi@7
|
1314 |
similar behaviour, but VF2++ is almost two times faster even on such
|
Madarasi@7
|
1315 |
small graphs.} \label{fig:INDMolecule}
|
Madarasi@7
|
1316 |
\end{figure}
|
Madarasi@7
|
1317 |
\end{subfigure}
|
Madarasi@7
|
1318 |
\hspace*{1.5cm}
|
Madarasi@7
|
1319 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1320 |
\begin{figure}[H]
|
Madarasi@7
|
1321 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@7
|
1322 |
\begin{axis}[title=Contact maps IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
1323 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
1324 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1325 |
format/1000 sep = \thinspace}]
|
Madarasi@7
|
1326 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
1327 |
\addplot table {Orig/ContactMaps.128.txt};
|
Madarasi@7
|
1328 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
Madarasi@7
|
1329 |
{VF2PPLabel/ContactMaps.128.txt};
|
Madarasi@7
|
1330 |
\end{axis}
|
Madarasi@7
|
1331 |
\end{tikzpicture}
|
Madarasi@7
|
1332 |
\caption{On contact maps, VF2++ runs in near constant time, while VF2
|
Madarasi@7
|
1333 |
Plus has a near linear behaviour.} \label{fig:INDContact}
|
Madarasi@7
|
1334 |
\end{figure}
|
Madarasi@7
|
1335 |
\end{subfigure}
|
Madarasi@7
|
1336 |
|
Madarasi@7
|
1337 |
\begin{center}
|
Madarasi@7
|
1338 |
\vspace*{-0.5cm}
|
Madarasi@7
|
1339 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1340 |
\begin{figure}[H]
|
Madarasi@7
|
1341 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
alpar@2
|
1342 |
\begin{axis}[title=Proteins IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1343 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1344 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1345 |
format/1000 sep = \thinspace}] %\addplot+[only marks] table
|
alpar@3
|
1346 |
{proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
|
alpar@3
|
1347 |
table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
|
alpar@3
|
1348 |
size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
|
alpar@2
|
1349 |
\end{axis}
|
alpar@2
|
1350 |
\end{tikzpicture}
|
alpar@3
|
1351 |
\caption{Both the algorithms have linear behaviour on protein
|
alpar@3
|
1352 |
graphs. VF2++ is more than 10 times faster than VF2
|
alpar@3
|
1353 |
Plus.} \label{fig:INDProt}
|
alpar@2
|
1354 |
\end{figure}
|
Madarasi@7
|
1355 |
\end{subfigure}
|
alpar@2
|
1356 |
\end{center}
|
Madarasi@7
|
1357 |
\vspace*{-0.5cm}
|
Madarasi@7
|
1358 |
\caption{\normalsize{Graph isomomorphism on biological graphs}}\label{fig:bioIND}
|
alpar@2
|
1359 |
\end{figure}
|
alpar@2
|
1360 |
|
alpar@2
|
1361 |
|
alpar@2
|
1362 |
|
alpar@2
|
1363 |
|
alpar@2
|
1364 |
|
alpar@2
|
1365 |
\subsection{Random graphs}
|
alpar@3
|
1366 |
This section compares VF2++ with VF2 Plus on random graphs of a large
|
alpar@3
|
1367 |
size. The node labels are uniformly distributed. Let $\delta$ denote
|
alpar@3
|
1368 |
the average degree. For the parameters of problems solved in the
|
alpar@3
|
1369 |
experiments, please see the top of each chart.
|
alpar@2
|
1370 |
\subsubsection{Graph isomorphism}
|
alpar@3
|
1371 |
To evaluate the efficiency of the algorithms in the case of graph
|
alpar@3
|
1372 |
isomorphism, connected graphs of less than 20 000 nodes have been
|
alpar@3
|
1373 |
considered. Generating a random graph and shuffling its nodes, an
|
Madarasi@7
|
1374 |
isomorphism had to be found. Figure \ref{fig:randISO} shows the runtime results
|
alpar@4
|
1375 |
on graph sets of various density.
|
alpar@2
|
1376 |
|
Madarasi@7
|
1377 |
|
Madarasi@7
|
1378 |
|
Madarasi@7
|
1379 |
|
alpar@2
|
1380 |
\begin{figure}[H]
|
Madarasi@7
|
1381 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1382 |
\hspace*{-1.5cm}
|
Madarasi@7
|
1383 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1384 |
\begin{center}
|
alpar@2
|
1385 |
\begin{tikzpicture}
|
Madarasi@7
|
1386 |
\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1387 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1388 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1389 |
format/1000 sep = \space}]
|
alpar@2
|
1390 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1391 |
\addplot table {randGraph/iso/vf2pIso5_1.txt};
|
alpar@3
|
1392 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1393 |
{randGraph/iso/vf2ppIso5_1.txt};
|
alpar@2
|
1394 |
\end{axis}
|
alpar@2
|
1395 |
\end{tikzpicture}
|
alpar@2
|
1396 |
\end{center}
|
Madarasi@7
|
1397 |
\end{subfigure}
|
Madarasi@7
|
1398 |
%\hspace{1cm}
|
Madarasi@7
|
1399 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1400 |
\begin{center}
|
alpar@2
|
1401 |
\begin{tikzpicture}
|
Madarasi@7
|
1402 |
\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1403 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1404 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1405 |
format/1000 sep = \space}]
|
alpar@2
|
1406 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1407 |
\addplot table {randGraph/iso/vf2pIso10_1.txt};
|
alpar@3
|
1408 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1409 |
{randGraph/iso/vf2ppIso10_1.txt};
|
alpar@2
|
1410 |
\end{axis}
|
alpar@2
|
1411 |
\end{tikzpicture}
|
alpar@2
|
1412 |
\end{center}
|
Madarasi@7
|
1413 |
\end{subfigure}
|
Madarasi@7
|
1414 |
%%\hspace{1cm}
|
Madarasi@7
|
1415 |
\hspace*{-1.5cm}
|
Madarasi@7
|
1416 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1417 |
\begin{center}
|
alpar@2
|
1418 |
\begin{tikzpicture}
|
Madarasi@7
|
1419 |
\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1420 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1421 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1422 |
format/1000 sep = \space}]
|
alpar@2
|
1423 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1424 |
\addplot table {randGraph/iso/vf2pIso15_1.txt};
|
alpar@3
|
1425 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1426 |
{randGraph/iso/vf2ppIso15_1.txt};
|
alpar@2
|
1427 |
\end{axis}
|
alpar@2
|
1428 |
\end{tikzpicture}
|
alpar@2
|
1429 |
\end{center}
|
Madarasi@7
|
1430 |
\end{subfigure}
|
Madarasi@7
|
1431 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1432 |
\begin{center}
|
alpar@2
|
1433 |
\begin{tikzpicture}
|
Madarasi@7
|
1434 |
\begin{axis}[title={Random ISO, $\delta = 35$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1435 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1436 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1437 |
format/1000 sep = \space}]
|
alpar@2
|
1438 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1439 |
\addplot table {randGraph/iso/vf2pIso35_1.txt};
|
alpar@3
|
1440 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1441 |
{randGraph/iso/vf2ppIso35_1.txt};
|
alpar@2
|
1442 |
\end{axis}
|
alpar@2
|
1443 |
\end{tikzpicture}
|
alpar@2
|
1444 |
\end{center}
|
Madarasi@7
|
1445 |
\end{subfigure}
|
Madarasi@7
|
1446 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1447 |
\hspace*{-1.5cm}
|
alpar@2
|
1448 |
\begin{tikzpicture}
|
Madarasi@7
|
1449 |
\begin{axis}[title={Random ISO, $\delta = 45$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1450 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1451 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@7
|
1452 |
format/1000 sep = \space}]
|
alpar@2
|
1453 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1454 |
\addplot table {randGraph/iso/vf2pIso45_1.txt};
|
alpar@3
|
1455 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1456 |
{randGraph/iso/vf2ppIso45_1.txt};
|
alpar@2
|
1457 |
\end{axis}
|
alpar@2
|
1458 |
\end{tikzpicture}
|
Madarasi@7
|
1459 |
\end{subfigure}
|
Madarasi@7
|
1460 |
\hspace*{-1.5cm}
|
Madarasi@7
|
1461 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1462 |
\begin{tikzpicture}
|
Madarasi@7
|
1463 |
\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1464 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1465 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1466 |
format/1000 sep = \thinspace}]
|
alpar@2
|
1467 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1468 |
\addplot table {randGraph/iso/vf2pIso100_1.txt};
|
alpar@3
|
1469 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1470 |
{randGraph/iso/vf2ppIso100_1.txt};
|
alpar@2
|
1471 |
\end{axis}
|
alpar@2
|
1472 |
\end{tikzpicture}
|
Madarasi@7
|
1473 |
\end{subfigure}
|
alpar@2
|
1474 |
\vspace*{-0.8cm}
|
Madarasi@7
|
1475 |
\caption{IND on graphs having an average degree of
|
Madarasi@7
|
1476 |
5.}\label{fig:randISO}
|
alpar@2
|
1477 |
\end{figure}
|
alpar@2
|
1478 |
|
alpar@2
|
1479 |
|
Madarasi@7
|
1480 |
|
Madarasi@7
|
1481 |
|
Madarasi@7
|
1482 |
|
Madarasi@7
|
1483 |
|
Madarasi@7
|
1484 |
|
Madarasi@7
|
1485 |
|
Madarasi@7
|
1486 |
|
Madarasi@7
|
1487 |
|
alpar@3
|
1488 |
Considering the graph isomorphism problem, VF2++ consistently
|
alpar@3
|
1489 |
outperforms its rival especially on sparse graphs. The reason for the
|
alpar@3
|
1490 |
slightly super linear behaviour of VF2++ on denser graphs is the
|
alpar@3
|
1491 |
larger number of nodes in the BFS tree constructed in
|
alpar@4
|
1492 |
Algorithm~\ref{alg:VF2PPPseu}.
|
alpar@2
|
1493 |
|
alpar@2
|
1494 |
\subsubsection{Induced subgraph isomorphism}
|
alpar@3
|
1495 |
This section provides a comparison of VF2++ and VF2 Plus in the case
|
alpar@3
|
1496 |
of induced subgraph isomorphism. In addition to the size of the large
|
alpar@3
|
1497 |
graph, that of the small graph dramatically influences the hardness of
|
alpar@3
|
1498 |
a given problem too, so the overall picture is provided by examining
|
alpar@3
|
1499 |
small graphs of various size.
|
alpar@2
|
1500 |
|
alpar@3
|
1501 |
For each chart, a number $0<\rho< 1$ has been fixed and the following
|
alpar@3
|
1502 |
has been executed 150 times. Generating a large graph $G_{large}$,
|
alpar@3
|
1503 |
choose 10 of its induced subgraphs having $\rho\ |V_{large}|$ nodes,
|
alpar@3
|
1504 |
and for all the 10 subgraphs find a mapping by using both the graph
|
alpar@3
|
1505 |
matching algorithms. The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
|
Madarasi@10
|
1506 |
0.3, 0.6, 0.8, 0.95$ cases have been examined, see
|
alpar@4
|
1507 |
Figure~\ref{fig:randIND5}, \ref{fig:randIND10} and
|
Madarasi@10
|
1508 |
\ref{fig:randIND35}.
|
alpar@2
|
1509 |
|
alpar@2
|
1510 |
|
alpar@2
|
1511 |
|
alpar@2
|
1512 |
|
alpar@2
|
1513 |
|
alpar@2
|
1514 |
\begin{figure}[H]
|
Madarasi@7
|
1515 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1516 |
\hspace*{-1.5cm}
|
alpar@2
|
1517 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1518 |
\begin{center}
|
alpar@2
|
1519 |
\begin{tikzpicture}
|
alpar@2
|
1520 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1521 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1522 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1523 |
format/1000 sep = \space}]
|
alpar@2
|
1524 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1525 |
\addplot table {randGraph/ind/vf2pInd5_0.05.txt};
|
alpar@3
|
1526 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1527 |
{randGraph/ind/vf2ppInd5_0.05.txt};
|
alpar@2
|
1528 |
\end{axis}
|
alpar@2
|
1529 |
\end{tikzpicture}
|
alpar@2
|
1530 |
\end{center}
|
alpar@2
|
1531 |
\end{subfigure}
|
alpar@2
|
1532 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1533 |
\begin{center}
|
alpar@2
|
1534 |
\begin{tikzpicture}
|
alpar@2
|
1535 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1536 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1537 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1538 |
format/1000 sep = \space}]
|
alpar@2
|
1539 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1540 |
\addplot table {randGraph/ind/vf2pInd5_0.1.txt};
|
alpar@3
|
1541 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1542 |
{randGraph/ind/vf2ppInd5_0.1.txt};
|
alpar@2
|
1543 |
\end{axis}
|
alpar@2
|
1544 |
\end{tikzpicture}
|
alpar@2
|
1545 |
\end{center}
|
alpar@2
|
1546 |
\end{subfigure}
|
Madarasi@7
|
1547 |
\hspace*{-1.5cm}
|
alpar@2
|
1548 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1549 |
\begin{center}
|
alpar@2
|
1550 |
\begin{tikzpicture}
|
alpar@2
|
1551 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1552 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1553 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1554 |
format/1000 sep = \space}]
|
alpar@2
|
1555 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1556 |
\addplot table {randGraph/ind/vf2pInd5_0.3.txt};
|
alpar@3
|
1557 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1558 |
{randGraph/ind/vf2ppInd5_0.3.txt};
|
alpar@2
|
1559 |
\end{axis}
|
alpar@2
|
1560 |
\end{tikzpicture}
|
alpar@2
|
1561 |
\end{center}
|
alpar@2
|
1562 |
\end{subfigure}
|
alpar@2
|
1563 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1564 |
\begin{center}
|
alpar@2
|
1565 |
\begin{tikzpicture}
|
alpar@2
|
1566 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1567 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1568 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1569 |
format/1000 sep = \space}]
|
alpar@2
|
1570 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1571 |
\addplot table {randGraph/ind/vf2pInd5_0.6.txt};
|
alpar@3
|
1572 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1573 |
{randGraph/ind/vf2ppInd5_0.6.txt};
|
alpar@2
|
1574 |
\end{axis}
|
alpar@2
|
1575 |
\end{tikzpicture}
|
alpar@2
|
1576 |
\end{center}
|
alpar@2
|
1577 |
\end{subfigure}
|
alpar@2
|
1578 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1579 |
\hspace*{-1.5cm}
|
alpar@2
|
1580 |
\begin{tikzpicture}
|
alpar@2
|
1581 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1582 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1583 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1584 |
format/1000 sep = \space}]
|
alpar@2
|
1585 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1586 |
\addplot table {randGraph/ind/vf2pInd5_0.8.txt};
|
alpar@3
|
1587 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1588 |
{randGraph/ind/vf2ppInd5_0.8.txt};
|
alpar@2
|
1589 |
\end{axis}
|
alpar@2
|
1590 |
\end{tikzpicture}
|
alpar@2
|
1591 |
\end{subfigure}
|
Madarasi@7
|
1592 |
\hspace*{-1.5cm}
|
alpar@2
|
1593 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1594 |
\begin{tikzpicture}
|
alpar@2
|
1595 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1596 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1597 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1598 |
format/1000 sep = \thinspace}]
|
alpar@2
|
1599 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1600 |
\addplot table {randGraph/ind/vf2pInd5_0.95.txt};
|
alpar@3
|
1601 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1602 |
{randGraph/ind/vf2ppInd5_0.95.txt};
|
alpar@2
|
1603 |
\end{axis}
|
alpar@2
|
1604 |
\end{tikzpicture}
|
alpar@2
|
1605 |
\end{subfigure}
|
alpar@2
|
1606 |
\vspace*{-0.8cm}
|
alpar@3
|
1607 |
\caption{IND on graphs having an average degree of
|
alpar@3
|
1608 |
5.}\label{fig:randIND5}
|
alpar@2
|
1609 |
\end{figure}
|
alpar@2
|
1610 |
|
alpar@2
|
1611 |
|
alpar@2
|
1612 |
\begin{figure}[H]
|
Madarasi@7
|
1613 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1614 |
\hspace*{-1.5cm}
|
alpar@2
|
1615 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1616 |
\begin{center}
|
Madarasi@7
|
1617 |
\hspace*{-0.5cm}
|
alpar@2
|
1618 |
\begin{tikzpicture}
|
alpar@2
|
1619 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1620 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1621 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1622 |
format/1000 sep = \space}]
|
alpar@2
|
1623 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1624 |
\addplot table {randGraph/ind/vf2pInd10_0.05.txt};
|
alpar@3
|
1625 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1626 |
{randGraph/ind/vf2ppInd10_0.05.txt};
|
alpar@2
|
1627 |
\end{axis}
|
alpar@2
|
1628 |
\end{tikzpicture}
|
alpar@2
|
1629 |
\end{center}
|
alpar@2
|
1630 |
\end{subfigure}
|
alpar@2
|
1631 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1632 |
\begin{center}
|
Madarasi@7
|
1633 |
\hspace*{-0.5cm}
|
alpar@2
|
1634 |
\begin{tikzpicture}
|
alpar@2
|
1635 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1636 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1637 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1638 |
format/1000 sep = \space}]
|
alpar@2
|
1639 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1640 |
\addplot table {randGraph/ind/vf2pInd10_0.1.txt};
|
alpar@3
|
1641 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1642 |
{randGraph/ind/vf2ppInd10_0.1.txt};
|
alpar@2
|
1643 |
\end{axis}
|
alpar@2
|
1644 |
\end{tikzpicture}
|
alpar@2
|
1645 |
\end{center}
|
alpar@2
|
1646 |
\end{subfigure}
|
Madarasi@7
|
1647 |
\hspace*{-1.5cm}
|
alpar@2
|
1648 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1649 |
\begin{center}
|
alpar@2
|
1650 |
\begin{tikzpicture}
|
alpar@2
|
1651 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1652 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1653 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1654 |
format/1000 sep = \space}]
|
alpar@2
|
1655 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1656 |
\addplot table {randGraph/ind/vf2pInd10_0.3.txt};
|
alpar@3
|
1657 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1658 |
{randGraph/ind/vf2ppInd10_0.3.txt};
|
alpar@2
|
1659 |
\end{axis}
|
alpar@2
|
1660 |
\end{tikzpicture}
|
alpar@2
|
1661 |
\end{center}
|
alpar@2
|
1662 |
\end{subfigure}
|
alpar@2
|
1663 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1664 |
\begin{center}
|
alpar@2
|
1665 |
\begin{tikzpicture}
|
alpar@2
|
1666 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1667 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1668 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1669 |
format/1000 sep = \space}]
|
alpar@2
|
1670 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1671 |
\addplot table {randGraph/ind/vf2pInd10_0.6.txt};
|
alpar@3
|
1672 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1673 |
{randGraph/ind/vf2ppInd10_0.6.txt};
|
alpar@2
|
1674 |
\end{axis}
|
alpar@2
|
1675 |
\end{tikzpicture}
|
alpar@2
|
1676 |
\end{center}
|
alpar@2
|
1677 |
\end{subfigure}
|
Madarasi@7
|
1678 |
\hspace*{-1.5cm}
|
alpar@2
|
1679 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1680 |
\begin{tikzpicture}
|
alpar@2
|
1681 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1682 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1683 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1684 |
format/1000 sep = \space}]
|
alpar@2
|
1685 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1686 |
\addplot table {randGraph/ind/vf2pInd10_0.8.txt};
|
alpar@3
|
1687 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1688 |
{randGraph/ind/vf2ppInd10_0.8.txt};
|
alpar@2
|
1689 |
\end{axis}
|
alpar@2
|
1690 |
\end{tikzpicture}
|
alpar@2
|
1691 |
\end{subfigure}
|
alpar@2
|
1692 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1693 |
\begin{tikzpicture}
|
alpar@2
|
1694 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1695 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1696 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1697 |
format/1000 sep = \thinspace}]
|
alpar@2
|
1698 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1699 |
\addplot table {randGraph/ind/vf2pInd10_0.95.txt};
|
alpar@3
|
1700 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1701 |
{randGraph/ind/vf2ppInd10_0.95.txt};
|
alpar@2
|
1702 |
\end{axis}
|
alpar@2
|
1703 |
\end{tikzpicture}
|
alpar@2
|
1704 |
\end{subfigure}
|
alpar@2
|
1705 |
\vspace*{-0.8cm}
|
alpar@3
|
1706 |
\caption{IND on graphs having an average degree of
|
alpar@3
|
1707 |
10.}\label{fig:randIND10}
|
alpar@2
|
1708 |
\end{figure}
|
alpar@2
|
1709 |
|
alpar@2
|
1710 |
|
alpar@2
|
1711 |
|
alpar@2
|
1712 |
\begin{figure}[H]
|
Madarasi@7
|
1713 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1714 |
\hspace*{-1.5cm}
|
alpar@2
|
1715 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1716 |
\begin{center}
|
alpar@2
|
1717 |
\begin{tikzpicture}
|
alpar@2
|
1718 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1719 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1720 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1721 |
format/1000 sep = \space}]
|
alpar@2
|
1722 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1723 |
\addplot table {randGraph/ind/vf2pInd35_0.05.txt};
|
alpar@3
|
1724 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1725 |
{randGraph/ind/vf2ppInd35_0.05.txt};
|
alpar@2
|
1726 |
\end{axis}
|
alpar@2
|
1727 |
\end{tikzpicture}
|
alpar@2
|
1728 |
\end{center}
|
alpar@2
|
1729 |
\end{subfigure}
|
alpar@2
|
1730 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1731 |
\begin{center}
|
alpar@2
|
1732 |
\begin{tikzpicture}
|
alpar@2
|
1733 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1734 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1735 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1736 |
format/1000 sep = \space}]
|
alpar@2
|
1737 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1738 |
\addplot table {randGraph/ind/vf2pInd35_0.1.txt};
|
alpar@3
|
1739 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1740 |
{randGraph/ind/vf2ppInd35_0.1.txt};
|
alpar@2
|
1741 |
\end{axis}
|
alpar@2
|
1742 |
\end{tikzpicture}
|
alpar@2
|
1743 |
\end{center}
|
alpar@2
|
1744 |
\end{subfigure}
|
Madarasi@7
|
1745 |
\hspace*{-1.5cm}
|
alpar@2
|
1746 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1747 |
\begin{center}
|
alpar@2
|
1748 |
\begin{tikzpicture}
|
alpar@2
|
1749 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1750 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1751 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1752 |
format/1000 sep = \space}]
|
alpar@2
|
1753 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1754 |
\addplot table {randGraph/ind/vf2pInd35_0.3.txt};
|
alpar@3
|
1755 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1756 |
{randGraph/ind/vf2ppInd35_0.3.txt};
|
alpar@2
|
1757 |
\end{axis}
|
alpar@2
|
1758 |
\end{tikzpicture}
|
alpar@2
|
1759 |
\end{center}
|
alpar@2
|
1760 |
\end{subfigure}
|
alpar@2
|
1761 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1762 |
\begin{center}
|
alpar@2
|
1763 |
\begin{tikzpicture}
|
alpar@2
|
1764 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1765 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1766 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1767 |
format/1000 sep = \space}]
|
alpar@2
|
1768 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1769 |
\addplot table {randGraph/ind/vf2pInd35_0.6.txt};
|
alpar@3
|
1770 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1771 |
{randGraph/ind/vf2ppInd35_0.6.txt};
|
alpar@2
|
1772 |
\end{axis}
|
alpar@2
|
1773 |
\end{tikzpicture}
|
alpar@2
|
1774 |
\end{center}
|
alpar@2
|
1775 |
\end{subfigure}
|
Madarasi@7
|
1776 |
\hspace*{-1.5cm}
|
alpar@2
|
1777 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1778 |
\begin{tikzpicture}
|
alpar@2
|
1779 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1780 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1781 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1782 |
format/1000 sep = \space}]
|
alpar@2
|
1783 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1784 |
\addplot table {randGraph/ind/vf2pInd35_0.8.txt};
|
alpar@3
|
1785 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1786 |
{randGraph/ind/vf2ppInd35_0.8.txt};
|
alpar@2
|
1787 |
\end{axis}
|
alpar@2
|
1788 |
\end{tikzpicture}
|
alpar@2
|
1789 |
\end{subfigure}
|
alpar@2
|
1790 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1791 |
\begin{tikzpicture}
|
alpar@2
|
1792 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1793 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1794 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
alpar@3
|
1795 |
format/1000 sep = \thinspace}]
|
alpar@2
|
1796 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1797 |
\addplot table {randGraph/ind/vf2pInd35_0.95.txt};
|
alpar@3
|
1798 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1799 |
{randGraph/ind/vf2ppInd35_0.95.txt};
|
alpar@2
|
1800 |
\end{axis}
|
alpar@2
|
1801 |
\end{tikzpicture}
|
alpar@2
|
1802 |
\end{subfigure}
|
alpar@2
|
1803 |
\vspace*{-0.8cm}
|
alpar@3
|
1804 |
\caption{IND on graphs having an average degree of
|
alpar@3
|
1805 |
35.}\label{fig:randIND35}
|
alpar@2
|
1806 |
\end{figure}
|
alpar@2
|
1807 |
|
alpar@2
|
1808 |
|
alpar@3
|
1809 |
Based on these experiments, VF2++ is faster than VF2 Plus and able to
|
alpar@3
|
1810 |
handle really large graphs in milliseconds. Note that when $IND$ was
|
alpar@3
|
1811 |
considered and the small graphs had proportionally few nodes ($\rho =
|
alpar@3
|
1812 |
0.05$, or $\rho = 0.1$), then VF2 Plus produced some inefficient node
|
alpar@4
|
1813 |
orders (e.g. see the $\delta=10$ case on
|
alpar@4
|
1814 |
Figure~\ref{fig:randIND10}). If these examples had been excluded, the
|
alpar@3
|
1815 |
charts would have seemed to be similar to the other ones.
|
alpar@3
|
1816 |
Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
|
alpar@3
|
1817 |
Plus slow slightly down, but remain practically usable even on graphs
|
alpar@3
|
1818 |
having 10 000 nodes.
|
alpar@2
|
1819 |
|
alpar@2
|
1820 |
|
alpar@2
|
1821 |
|
alpar@2
|
1822 |
|
alpar@3
|
1823 |
|
alpar@2
|
1824 |
\section{Conclusion}
|
alpar@3
|
1825 |
In this paper, after providing a short summary of the recent
|
alpar@3
|
1826 |
algorithms, a new graph matching algorithm based on VF2, called VF2++,
|
alpar@3
|
1827 |
has been presented and analyzed from a practical viewpoint.
|
alpar@2
|
1828 |
|
alpar@3
|
1829 |
Recognizing the importance of the node order and determining an
|
alpar@3
|
1830 |
efficient one, VF2++ is able to match graphs of thousands of nodes in
|
alpar@3
|
1831 |
near practically linear time including preprocessing. In addition to
|
alpar@3
|
1832 |
the proper order, VF2++ uses more efficient consistency and cutting
|
alpar@3
|
1833 |
rules which are easy to compute and make the algorithm able to prune
|
alpar@3
|
1834 |
most of the unfruitful branches without going astray.
|
alpar@2
|
1835 |
|
alpar@3
|
1836 |
In order to show the efficiency of the new method, it has been
|
alpar@3
|
1837 |
compared to VF2 Plus, which is the best concurrent algorithm based on
|
alpar@3
|
1838 |
\cite{VF2Plus}.
|
alpar@2
|
1839 |
|
alpar@3
|
1840 |
The experiments show that VF2++ consistently outperforms VF2 Plus on
|
alpar@3
|
1841 |
biological graphs. It seems to be asymptotically faster on protein and
|
alpar@3
|
1842 |
on contact map graphs in the case of induced subgraph isomorphism,
|
alpar@3
|
1843 |
while in the case of graph isomorphism, it has definitely better
|
alpar@3
|
1844 |
asymptotic behaviour on protein graphs.
|
alpar@2
|
1845 |
|
alpar@3
|
1846 |
Regarding random sparse graphs, not only has VF2++ proved itself to be
|
alpar@3
|
1847 |
faster than VF2 Plus, but it has a practically linear behaviour both
|
alpar@3
|
1848 |
in the case of induced subgraph- and graph isomorphism, as well.
|
alpar@2
|
1849 |
|
alpar@2
|
1850 |
|
alpar@0
|
1851 |
|
alpar@0
|
1852 |
%% The Appendices part is started with the command \appendix;
|
alpar@0
|
1853 |
%% appendix sections are then done as normal sections
|
alpar@0
|
1854 |
%% \appendix
|
alpar@0
|
1855 |
|
alpar@0
|
1856 |
%% \section{}
|
alpar@0
|
1857 |
%% \label{}
|
alpar@0
|
1858 |
|
alpar@0
|
1859 |
%% If you have bibdatabase file and want bibtex to generate the
|
alpar@0
|
1860 |
%% bibitems, please use
|
alpar@0
|
1861 |
%%
|
alpar@3
|
1862 |
\bibliographystyle{elsarticle-num} \bibliography{bibliography}
|
alpar@0
|
1863 |
|
alpar@0
|
1864 |
%% else use the following coding to input the bibitems directly in the
|
alpar@0
|
1865 |
%% TeX file.
|
alpar@0
|
1866 |
|
alpar@2
|
1867 |
%% \begin{thebibliography}{00}
|
alpar@0
|
1868 |
|
alpar@2
|
1869 |
%% %% \bibitem{label}
|
alpar@2
|
1870 |
%% %% Text of bibliographic item
|
alpar@0
|
1871 |
|
alpar@2
|
1872 |
%% \bibitem{}
|
alpar@0
|
1873 |
|
alpar@2
|
1874 |
%% \end{thebibliography}
|
alpar@2
|
1875 |
|
alpar@0
|
1876 |
\end{document}
|
alpar@0
|
1877 |
\endinput
|
alpar@0
|
1878 |
%%
|
alpar@0
|
1879 |
%% End of file `elsarticle-template-num.tex'.
|