alpar@0
|
1 |
%%
|
alpar@0
|
2 |
%% Copyright 2007, 2008, 2009 Elsevier Ltd
|
alpar@0
|
3 |
%%
|
alpar@0
|
4 |
%% This file is part of the 'Elsarticle Bundle'.
|
alpar@0
|
5 |
%% ---------------------------------------------
|
alpar@0
|
6 |
%%
|
alpar@0
|
7 |
%% It may be distributed under the conditions of the LaTeX Project Public
|
alpar@0
|
8 |
%% License, either version 1.2 of this license or (at your option) any
|
alpar@0
|
9 |
%% later version. The latest version of this license is in
|
alpar@0
|
10 |
%% http://www.latex-project.org/lppl.txt
|
alpar@0
|
11 |
%% and version 1.2 or later is part of all distributions of LaTeX
|
alpar@0
|
12 |
%% version 1999/12/01 or later.
|
alpar@0
|
13 |
%%
|
alpar@0
|
14 |
%% The list of all files belonging to the 'Elsarticle Bundle' is
|
alpar@0
|
15 |
%% given in the file `manifest.txt'.
|
alpar@0
|
16 |
%%
|
alpar@0
|
17 |
|
alpar@0
|
18 |
%% Template article for Elsevier's document class `elsarticle'
|
alpar@0
|
19 |
%% with numbered style bibliographic references
|
alpar@0
|
20 |
%% SP 2008/03/01
|
alpar@0
|
21 |
|
alpar@0
|
22 |
\documentclass[preprint,12pt]{elsarticle}
|
alpar@0
|
23 |
|
alpar@0
|
24 |
%% Use the option review to obtain double line spacing
|
alpar@0
|
25 |
%% \documentclass[authoryear,preprint,review,12pt]{elsarticle}
|
alpar@0
|
26 |
|
alpar@0
|
27 |
%% Use the options 1p,twocolumn; 3p; 3p,twocolumn; 5p; or 5p,twocolumn
|
alpar@0
|
28 |
%% for a journal layout:
|
alpar@0
|
29 |
%% \documentclass[final,1p,times]{elsarticle}
|
alpar@0
|
30 |
%% \documentclass[final,1p,times,twocolumn]{elsarticle}
|
alpar@0
|
31 |
%% \documentclass[final,3p,times]{elsarticle}
|
alpar@0
|
32 |
%% \documentclass[final,3p,times,twocolumn]{elsarticle}
|
alpar@0
|
33 |
%% \documentclass[final,5p,times]{elsarticle}
|
alpar@0
|
34 |
%% \documentclass[final,5p,times,twocolumn]{elsarticle}
|
alpar@0
|
35 |
|
alpar@0
|
36 |
%% For including figures, graphicx.sty has been loaded in
|
alpar@0
|
37 |
%% elsarticle.cls. If you prefer to use the old commands
|
alpar@0
|
38 |
%% please give \usepackage{epsfig}
|
alpar@0
|
39 |
|
alpar@0
|
40 |
%% The amssymb package provides various useful mathematical symbols
|
alpar@0
|
41 |
\usepackage{amssymb}
|
alpar@0
|
42 |
%% The amsthm package provides extended theorem environments
|
alpar@0
|
43 |
%% \usepackage{amsthm}
|
alpar@0
|
44 |
|
alpar@0
|
45 |
%% The lineno packages adds line numbers. Start line numbering with
|
alpar@0
|
46 |
%% \begin{linenumbers}, end it with \end{linenumbers}. Or switch it on
|
alpar@0
|
47 |
%% for the whole article with \linenumbers.
|
alpar@0
|
48 |
%% \usepackage{lineno}
|
alpar@0
|
49 |
|
alpar@2
|
50 |
\usepackage{amsmath}
|
alpar@2
|
51 |
%% \usepackage[pdftex]{graphicx}
|
alpar@2
|
52 |
|
alpar@2
|
53 |
\usepackage{pgfplots}
|
alpar@2
|
54 |
\pgfplotsset{width=9cm}
|
alpar@2
|
55 |
\pgfplotsset{compat=1.8}
|
alpar@2
|
56 |
|
alpar@2
|
57 |
\usepackage{caption}
|
alpar@2
|
58 |
\usepackage{subcaption}
|
alpar@2
|
59 |
|
alpar@2
|
60 |
\usepackage{algorithm}
|
alpar@2
|
61 |
\usepackage{algpseudocode}
|
alpar@2
|
62 |
\usepackage{tikz}
|
alpar@2
|
63 |
|
alpar@2
|
64 |
\usepackage{amsthm,amssymb}
|
alpar@2
|
65 |
\renewcommand{\qedsymbol}{\rule{0.7em}{0.7em}}
|
alpar@2
|
66 |
|
alpar@2
|
67 |
\newtheorem{theorem}{Theorem}[subsection]
|
alpar@2
|
68 |
\newtheorem{corollary}{Corollary}[theorem]
|
alpar@2
|
69 |
\newtheorem{claim}[theorem]{Claim}
|
alpar@2
|
70 |
|
alpar@2
|
71 |
\newtheorem{definition}{Definition}[subsection]
|
alpar@2
|
72 |
\newtheorem{notation}{Notation}[subsection]
|
alpar@2
|
73 |
\newtheorem{example}{Example}[subsection]
|
alpar@2
|
74 |
\usetikzlibrary{decorations.markings}
|
alpar@2
|
75 |
\let\oldproofname=\proofname
|
alpar@2
|
76 |
%% \renewcommand{\proofname}{\rm\bf{Proof:}}
|
alpar@2
|
77 |
|
Madarasi@7
|
78 |
\captionsetup{font=normalsize}
|
Madarasi@7
|
79 |
|
alpar@1
|
80 |
\journal{Discrete Applied Mathematics}
|
alpar@0
|
81 |
|
alpar@0
|
82 |
\begin{document}
|
alpar@0
|
83 |
|
alpar@0
|
84 |
\begin{frontmatter}
|
alpar@0
|
85 |
|
alpar@0
|
86 |
%% Title, authors and addresses
|
alpar@0
|
87 |
|
alpar@0
|
88 |
%% use the tnoteref command within \title for footnotes;
|
alpar@0
|
89 |
%% use the tnotetext command for theassociated footnote;
|
alpar@0
|
90 |
%% use the fnref command within \author or \address for footnotes;
|
alpar@0
|
91 |
%% use the fntext command for theassociated footnote;
|
alpar@0
|
92 |
%% use the corref command within \author for corresponding author footnotes;
|
alpar@0
|
93 |
%% use the cortext command for theassociated footnote;
|
alpar@0
|
94 |
%% use the ead command for the email address,
|
alpar@0
|
95 |
%% and the form \ead[url] for the home page:
|
alpar@0
|
96 |
%% \title{Title\tnoteref{label1}}
|
alpar@0
|
97 |
%% \tnotetext[label1]{}
|
alpar@0
|
98 |
%% \author{Name\corref{cor1}\fnref{label2}}
|
alpar@0
|
99 |
%% \ead{email address}
|
alpar@0
|
100 |
%% \ead[url]{home page}
|
alpar@0
|
101 |
%% \fntext[label2]{}
|
alpar@0
|
102 |
%% \cortext[cor1]{}
|
alpar@0
|
103 |
%% \address{Address\fnref{label3}}
|
alpar@0
|
104 |
%% \fntext[label3]{}
|
alpar@0
|
105 |
|
alpar@24
|
106 |
\title{VF2++ --- An Improved Subgraph Isomorphism Algorithm}
|
alpar@0
|
107 |
|
alpar@0
|
108 |
%% use optional labels to link authors explicitly to addresses:
|
alpar@0
|
109 |
%% \author[label1,label2]{}
|
alpar@0
|
110 |
%% \address[label1]{}
|
alpar@0
|
111 |
%% \address[label2]{}
|
alpar@0
|
112 |
|
alpar@24
|
113 |
\author[egres,elte]{Alp{\'a}r J{\"u}ttner}
|
alpar@24
|
114 |
\ead{alpar@cs.elte.hu}
|
alpar@24
|
115 |
\author[elte]{P{\'e}ter Madarasi}
|
alpar@24
|
116 |
\ead{madarasip@caesar.elte.hu}
|
alpar@24
|
117 |
\address[egres]{MTA-ELTE Egerv{\'a}ry Research Group, Budapest, Hungary.}
|
alpar@24
|
118 |
\address[elte]{Department of Operations Research, E{\"o}tv{\"o}s Lor{\'a}nd University, Budapest, Hungary.}
|
alpar@0
|
119 |
|
alpar@0
|
120 |
\begin{abstract}
|
alpar@0
|
121 |
|
alpar@24
|
122 |
This paper presents a largely improved version of the VF2 algorithm
|
alpar@24
|
123 |
for the \emph{Subgraph Isomorphism Problem}. The improvements are
|
alpar@24
|
124 |
twofold. Firstly, it is based on a new approach for determining the
|
alpar@29
|
125 |
matching order of the nodes, and secondly, more efficient ---
|
alpar@29
|
126 |
nevertheless easier to compute --- cutting rules are proposed. They
|
alpar@29
|
127 |
together reduce the search space significantly.
|
alpar@1
|
128 |
|
Madarasi@28
|
129 |
In addition to the usual \emph{Subgraph Isomorphism Problem}, the paper also
|
Madarasi@28
|
130 |
presents specialized algorithms for the \emph{Induced Subgraph
|
alpar@24
|
131 |
Isomorphism} and for the \emph{Graph Isomorphism Problems}.
|
alpar@1
|
132 |
|
alpar@24
|
133 |
Finally, an extensive experimental evaluation is provided using a
|
Madarasi@28
|
134 |
wide range of inputs, including both real-life biological and
|
alpar@24
|
135 |
chemical datasets and standard randomly generated graph series. The
|
alpar@24
|
136 |
results show major and consistent running time improvements over the
|
alpar@24
|
137 |
other known methods.
|
alpar@24
|
138 |
|
Madarasi@28
|
139 |
The C++ implementations of the algorithms are available open-source as
|
Madarasi@28
|
140 |
part of the LEMON graph and network optimization library.
|
alpar@24
|
141 |
|
alpar@0
|
142 |
\end{abstract}
|
alpar@0
|
143 |
|
alpar@0
|
144 |
\begin{keyword}
|
alpar@24
|
145 |
Computational Biology, Subgraph Isomorphism Problem
|
alpar@0
|
146 |
%% keywords here, in the form: keyword \sep keyword
|
alpar@0
|
147 |
|
alpar@0
|
148 |
%% PACS codes here, in the form: \PACS code \sep code
|
alpar@0
|
149 |
|
alpar@0
|
150 |
%% MSC codes here, in the form: \MSC code \sep code
|
alpar@0
|
151 |
%% or \MSC[2008] code \sep code (2000 is the default)
|
alpar@0
|
152 |
|
alpar@0
|
153 |
\end{keyword}
|
alpar@0
|
154 |
|
alpar@0
|
155 |
\end{frontmatter}
|
alpar@0
|
156 |
|
alpar@0
|
157 |
%% \linenumbers
|
alpar@0
|
158 |
|
alpar@0
|
159 |
%% main text
|
alpar@2
|
160 |
\section{Introduction}
|
alpar@2
|
161 |
\label{sec:intro}
|
alpar@2
|
162 |
|
alpar@3
|
163 |
In the last decades, combinatorial structures, and especially graphs
|
alpar@3
|
164 |
have been considered with ever increasing interest, and applied to the
|
alpar@29
|
165 |
solution of several new and revised questions. The expressiveness,
|
alpar@29
|
166 |
the simplicity and the deep theoretical background
|
alpar@29
|
167 |
of graphs make it one of the most useful
|
alpar@29
|
168 |
modeling tool and appears constantly in several seemingly independent
|
Madarasi@19
|
169 |
fields, such as bioinformatics and chemistry.
|
alpar@2
|
170 |
|
alpar@3
|
171 |
Complex biological systems arise from the interaction and cooperation
|
alpar@29
|
172 |
of plenty of molecular components. Getting acquainted with the
|
alpar@29
|
173 |
structure of such systems at the molecular level is of primary
|
alpar@29
|
174 |
importance, since protein-protein interaction, DNA-protein
|
alpar@29
|
175 |
interaction, metabolic interaction, transcription factor binding,
|
alpar@29
|
176 |
neuronal networks, and hormone singling networks can be understood
|
alpar@29
|
177 |
this way.
|
alpar@2
|
178 |
|
alpar@29
|
179 |
Many chemical and biological structures can easily be modeled
|
Madarasi@19
|
180 |
as graphs, for instance, a molecular structure can be
|
Madarasi@19
|
181 |
considered as a graph, whose nodes correspond to atoms and whose
|
Madarasi@19
|
182 |
edges to chemical bonds. The similarity and dissimilarity of
|
Madarasi@19
|
183 |
objects corresponding to nodes are incorporated to the model
|
Madarasi@19
|
184 |
by \emph{node labels}. Understanding such networks basically
|
Madarasi@26
|
185 |
requires finding specific subgraphs, thus it calls for efficient
|
Madarasi@19
|
186 |
graph matching algorithms.
|
alpar@2
|
187 |
|
Madarasi@19
|
188 |
Other real-world fields related to some
|
Madarasi@19
|
189 |
variants of graph matching include pattern recognition
|
Madarasi@28
|
190 |
and machine vision~\cite{HorstBunkeApplications}, symbol recognition~\cite{CordellaVentoSymbolRecognition}, and face identification~\cite{JianzhuangYongFaceIdentification}. \\
|
alpar@2
|
191 |
|
alpar@3
|
192 |
Subgraph and induced subgraph matching problems are known to be
|
Madarasi@28
|
193 |
NP-Complete~\cite{SubgraphNPC}, while the graph isomorphism problem is
|
alpar@3
|
194 |
one of the few problems in NP neither known to be in P nor
|
Madarasi@28
|
195 |
NP-Complete. Although polynomial-time isomorphism algorithms are known
|
alpar@3
|
196 |
for various graph classes, like trees and planar
|
Madarasi@28
|
197 |
graphs~\cite{PlanarGraphIso}, bounded valence
|
Madarasi@28
|
198 |
graphs~\cite{BondedDegGraphIso}, interval graphs~\cite{IntervalGraphIso}
|
alpar@29
|
199 |
or permutation graphs~\cite{PermGraphIso}. Furthermore, an FPT algorithm has also been presented for the colored hypergraph isomorphism problem in~\cite{ColoredHiperGraphIso}.
|
alpar@2
|
200 |
|
alpar@29
|
201 |
In the following, some algorithms which do not need any restrictions on the graphs are
|
alpar@29
|
202 |
summarized. Even though,
|
alpar@29
|
203 |
an overall polynomial behavior is not expectable from such an
|
alpar@29
|
204 |
alternative, they may often have good practical performance, in fact,
|
alpar@29
|
205 |
they might be the best choice in practice even on a graph class for which polynomial
|
Madarasi@19
|
206 |
algorithm is known.
|
alpar@2
|
207 |
|
alpar@3
|
208 |
The first practically usable approach was due to
|
Madarasi@28
|
209 |
\emph{Ullmann}~\cite{Ullmann}, which is a commonly used algorithm based on depth-first
|
Madarasi@28
|
210 |
search with a complex heuristic for reducing the
|
alpar@3
|
211 |
number of visited states. A major problem is its $\Theta(n^3)$ space
|
alpar@3
|
212 |
complexity, which makes it impractical in the case of big sparse
|
alpar@3
|
213 |
graphs.
|
alpar@2
|
214 |
|
Madarasi@28
|
215 |
In a recent paper, Ullmann~\cite{UllmannBit} presents an
|
alpar@3
|
216 |
improved version of this algorithm based on a bit-vector solution for
|
alpar@3
|
217 |
the binary Constraint Satisfaction Problem.
|
alpar@2
|
218 |
|
Madarasi@28
|
219 |
The \emph{Nauty} algorithm~\cite{Nauty} transforms the two graphs to
|
alpar@29
|
220 |
a canonical form before starting to look for an isomorphism. It has
|
alpar@3
|
221 |
been considered as one of the fastest graph isomorphism algorithms,
|
alpar@3
|
222 |
although graph categories were shown in which it takes exponentially
|
alpar@3
|
223 |
many steps. This algorithm handles only the graph isomorphism problem.
|
alpar@2
|
224 |
|
Madarasi@28
|
225 |
The \emph{LAD} algorithm~\cite{Lad} uses a depth-first search
|
alpar@3
|
226 |
strategy and formulates the matching as a Constraint Satisfaction
|
alpar@3
|
227 |
Problem to prune the search tree. The constraints are that the mapping
|
alpar@29
|
228 |
has to be invective and edge-preserving, hence it is possible to
|
alpar@3
|
229 |
handle new matching types as well.
|
alpar@2
|
230 |
|
Madarasi@28
|
231 |
The \emph{RI} algorithm~\cite{RI} and its variations are based on a
|
alpar@3
|
232 |
state space representation. After reordering the nodes of the graphs,
|
alpar@3
|
233 |
it uses some fast executable heuristic checks without using any
|
alpar@3
|
234 |
complex pruning rules. It seems to run really efficiently on graphs
|
alpar@3
|
235 |
coming from biology, and won the International Contest on Pattern
|
Madarasi@28
|
236 |
Search in Biological Databases~\cite{Content}.
|
alpar@2
|
237 |
|
Madarasi@28
|
238 |
Currently, the most commonly used algorithm is the
|
alpar@29
|
239 |
\emph{VF2}~\cite{VF2}, an improved version of \emph{VF}~\cite{VF}, which was
|
alpar@3
|
240 |
designed for solving pattern matching and computer vision problems,
|
alpar@3
|
241 |
and has been one of the best overall algorithms for more than a
|
Madarasi@28
|
242 |
decade. Although, it is not as fast as some of the new specialized algorithms, it is still widely used due to its simplicity and space efficiency. VF2 uses
|
alpar@29
|
243 |
a state space representation and checks specific conditions in each state
|
alpar@3
|
244 |
to prune the search tree.
|
alpar@2
|
245 |
|
Madarasi@28
|
246 |
Meanwhile, another variant called \emph{VF2~Plus}~\cite{VF2Plus} has
|
alpar@3
|
247 |
been published. It is considered to be as efficient as the RI
|
alpar@29
|
248 |
algorithm and has a strictly better behavior on large graphs. The
|
alpar@29
|
249 |
main idea of VF2~Plus is to precompute a heuristic node order of the graph to be embedded, on which VF2 works more efficiently.
|
alpar@2
|
250 |
|
Madarasi@19
|
251 |
This paper introduces \emph{VF2++}, a new further improved algorithm
|
alpar@29
|
252 |
for the graph and (induced) subgraph isomorphism problems. It is based on
|
Madarasi@19
|
253 |
efficient cutting rules and determines a node order in which VF2 runs
|
Madarasi@19
|
254 |
significantly faster on practical inputs.
|
Madarasi@19
|
255 |
|
alpar@25
|
256 |
The rest of the paper is structured as
|
alpar@25
|
257 |
follows. Section~\ref{sec:ProbStat} defines the exact problems to be
|
alpar@25
|
258 |
solved, Section~\ref{sec:VF2Alg} provides a description of VF2. Based
|
alpar@25
|
259 |
on that, Section~\ref{sec:VF2ppAlg} introduces VF2++. Some technical
|
alpar@25
|
260 |
details necessary for an efficient implementation are discussed in
|
alpar@29
|
261 |
Section~\ref{sec:VF2ppImpl}. Finally, Section~\ref{sec:ExpRes}
|
Madarasi@28
|
262 |
provides a detailed experimental evaluation of VF2++ and its comparison
|
alpar@27
|
263 |
to the state-of-the-art algorithm.
|
Madarasi@26
|
264 |
|
alpar@25
|
265 |
It must also be mentioned that the C++ implementations of the
|
alpar@25
|
266 |
algorithms have been made available for evaluation and use under an
|
alpar@29
|
267 |
open-source license as a part of
|
alpar@29
|
268 |
LEMON~\cite{o11:_lemon_open_sourc_c_graph_templ_librar} graph library.
|
Madarasi@22
|
269 |
|
Madarasi@22
|
270 |
\section{Problem Statement}\label{sec:ProbStat}
|
Madarasi@19
|
271 |
This section provides a formal description of the problems to be
|
alpar@3
|
272 |
solved.
|
alpar@2
|
273 |
\subsection{Definitions}
|
alpar@2
|
274 |
|
Madarasi@19
|
275 |
Throughout the paper $G_{1}=(V_{1}, E_{1})$ and
|
Madarasi@19
|
276 |
$G_{2}=(V_{2}, E_{2})$ denote two undirected graphs.
|
Madarasi@19
|
277 |
|
Madarasi@19
|
278 |
\begin{definition}
|
Madarasi@19
|
279 |
$\mathcal{L}: (V_{1}\cup V_{2}) \longrightarrow K$ is a \textbf{node
|
Madarasi@19
|
280 |
label function}, where K is an arbitrary set. The elements in K
|
Madarasi@19
|
281 |
are the \textbf{node labels}. Two nodes, u and v are said to be
|
Madarasi@19
|
282 |
\textbf{equivalent} if $\mathcal{L}(u)=\mathcal{L}(v)$.
|
Madarasi@19
|
283 |
\end{definition}
|
Madarasi@19
|
284 |
|
alpar@29
|
285 |
For the sake of simplicity, the graph, subgraph and induced subgraph
|
alpar@29
|
286 |
isomorphisms are defined in a more general way.
|
Madarasi@19
|
287 |
|
alpar@2
|
288 |
\begin{definition}\label{sec:ismorphic}
|
Madarasi@19
|
289 |
$G_{1}$ and $G_{2}$ are \textbf{isomorphic} (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
|
Madarasi@19
|
290 |
V_{1} \longrightarrow V_{2}$ bijection, for which the
|
alpar@3
|
291 |
following is true:
|
alpar@2
|
292 |
\begin{center}
|
Madarasi@20
|
293 |
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
|
Madarasi@19
|
294 |
$\forall u,v\in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in{E_{2}}$
|
alpar@2
|
295 |
\end{center}
|
alpar@2
|
296 |
\end{definition}
|
Madarasi@19
|
297 |
|
alpar@2
|
298 |
\begin{definition}
|
Madarasi@19
|
299 |
$G_{1}$ is a \textbf{subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
|
Madarasi@19
|
300 |
V_{1}\longrightarrow V_{2}$ injection, for which the
|
alpar@3
|
301 |
following is true:
|
alpar@2
|
302 |
\begin{center}
|
Madarasi@20
|
303 |
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
|
Madarasi@19
|
304 |
$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Rightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
|
alpar@2
|
305 |
\end{center}
|
alpar@2
|
306 |
\end{definition}
|
alpar@2
|
307 |
|
alpar@2
|
308 |
\begin{definition}
|
Madarasi@19
|
309 |
$G_{1}$ is an \textbf{induced subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists
|
Madarasi@19
|
310 |
\mathfrak{m}: V_{1}\longrightarrow V_{2}$ injection, for which the
|
alpar@3
|
311 |
following is true:
|
alpar@2
|
312 |
\begin{center}
|
Madarasi@19
|
313 |
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and
|
Madarasi@19
|
314 |
|
Madarasi@19
|
315 |
$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow
|
Madarasi@19
|
316 |
(\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
|
alpar@2
|
317 |
\end{center}
|
alpar@2
|
318 |
\end{definition}
|
alpar@2
|
319 |
|
alpar@2
|
320 |
|
alpar@2
|
321 |
\subsection{Common problems}\label{sec:CommProb}
|
alpar@2
|
322 |
|
Madarasi@28
|
323 |
The focus of this paper is on the following problems appearing in many applications.
|
alpar@2
|
324 |
|
Madarasi@28
|
325 |
The \textbf{subgraph isomorphism problem} is the following: is
|
Madarasi@19
|
326 |
$G_{1}$ isomorphic to any subgraph of $G_{2}$ by a given node
|
alpar@3
|
327 |
label?
|
alpar@2
|
328 |
|
Madarasi@28
|
329 |
The \textbf{induced subgraph isomorphism problem} asks the same about the
|
alpar@3
|
330 |
existence of an induced subgraph.
|
alpar@2
|
331 |
|
alpar@3
|
332 |
The \textbf{graph isomorphism problem} can be defined as induced
|
alpar@3
|
333 |
subgraph matching problem where the sizes of the two graphs are equal.
|
alpar@2
|
334 |
|
alpar@29
|
335 |
In addition, one may either want to find a \textbf{single} embedding or \textbf{enumerate} all of them.
|
alpar@2
|
336 |
|
Madarasi@22
|
337 |
\section{The VF2 Algorithm}\label{sec:VF2Alg}
|
Madarasi@28
|
338 |
This algorithm is the basis of both the VF2++ and the VF2~Plus. VF2
|
Madarasi@28
|
339 |
is able to handle all the variations mentioned in Section~\ref{sec:CommProb}. Although it can also handle directed graphs,
|
alpar@29
|
340 |
for the sake of simplicity, only the undirected case is
|
alpar@3
|
341 |
discussed.
|
alpar@2
|
342 |
|
alpar@2
|
343 |
|
alpar@2
|
344 |
\subsection{Common notations}
|
Madarasi@19
|
345 |
\indent Assume $G_{1}$ is searched in $G_{2}$. The following
|
alpar@29
|
346 |
definitions and notations is used throughout this paper.
|
alpar@2
|
347 |
\begin{definition}
|
Madarasi@19
|
348 |
An injection $\mathfrak{m} : D \longrightarrow V_2$ is called (partial) \textbf{mapping}, where $D\subseteq V_1$.
|
Madarasi@19
|
349 |
\end{definition}
|
Madarasi@19
|
350 |
|
Madarasi@19
|
351 |
\begin{notation}
|
Madarasi@19
|
352 |
$\mathfrak{D}(f)$ and $\mathfrak{R}(f)$ denote the domain and the range of a function $f$, respectively.
|
Madarasi@19
|
353 |
\end{notation}
|
Madarasi@19
|
354 |
|
Madarasi@19
|
355 |
\begin{definition}
|
Madarasi@19
|
356 |
Mapping $\mathfrak{m}$ \textbf{covers} a node $u\in V_1\cup V_2$ if $u\in \mathfrak{D}(\mathfrak{m})\cup \mathfrak{R}(\mathfrak{m})$.
|
alpar@2
|
357 |
\end{definition}
|
alpar@2
|
358 |
|
alpar@2
|
359 |
\begin{definition}
|
Madarasi@19
|
360 |
A mapping $\mathfrak{m}$ is $\mathbf{whole\ mapping}$ if $\mathfrak{m}$ covers all the
|
Madarasi@19
|
361 |
nodes of $V_{1}$, i.e. $\mathfrak{D}(\mathfrak{m})=V_1$.
|
alpar@2
|
362 |
\end{definition}
|
alpar@2
|
363 |
|
alpar@2
|
364 |
\begin{definition}
|
Madarasi@28
|
365 |
Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : f(w)=\mathfrak{m}(w)$ and $f(u)=v$ holds, where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$; otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
|
alpar@2
|
366 |
\end{definition}
|
alpar@2
|
367 |
|
alpar@2
|
368 |
\begin{notation}
|
Madarasi@19
|
369 |
Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
|
Madarasi@28
|
370 |
which can be substituted by any of the $\mathbf{SUB}$, $\mathbf{IND}$
|
alpar@29
|
371 |
and $\mathbf{ISO}$ problems, which stand for the problems mentioned in Section~\ref{sec:CommProb} respectively.
|
alpar@2
|
372 |
\end{notation}
|
alpar@2
|
373 |
|
alpar@2
|
374 |
\begin{definition}
|
Madarasi@28
|
375 |
Let $\mathfrak{m}$ be a mapping. The \textbf{consistency function for } $\mathbf{PT}$ is a logical function, for which $\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if and only if $\mathfrak{m}$ satisfies the requirements of $\mathbf{PT}$ considering the subgraphs of $G_{1}$ and $G_{2}$ induced by $\mathfrak{D}(\mathfrak{m})$ and $\mathfrak{R}(\mathfrak{m})$, respectively.
|
Madarasi@28
|
376 |
|
Madarasi@28
|
377 |
%$\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
|
alpar@2
|
378 |
\end{definition}
|
alpar@2
|
379 |
|
alpar@2
|
380 |
\begin{definition}
|
Madarasi@19
|
381 |
Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
|
Madarasi@28
|
382 |
\textbf{cutting function for } $\mathbf{PT}$ if the following
|
Madarasi@19
|
383 |
holds. $\mathbf{Cut_{PT}(\mathfrak{m})}$ is false if there exists a sequence of extend operations, which results in a whole mapping satisfying the requirements of $PT$.
|
alpar@2
|
384 |
\end{definition}
|
alpar@2
|
385 |
|
alpar@2
|
386 |
\begin{definition}
|
Madarasi@28
|
387 |
A mapping $\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
|
Madarasi@19
|
388 |
$Cons_{PT}(\mathfrak{m})$ is true.
|
alpar@2
|
389 |
\end{definition}
|
alpar@2
|
390 |
|
alpar@2
|
391 |
$Cons_{PT}$ and $Cut_{PT}$ will often be used in the following form.
|
alpar@2
|
392 |
\begin{notation}
|
Madarasi@19
|
393 |
Let $\mathbf{Cons_{PT}(p, \mathfrak{m})}:=Cons_{PT}(extend(\mathfrak{m},p))$, and
|
Madarasi@19
|
394 |
$\mathbf{Cut_{PT}(p, \mathfrak{m})}:=Cut_{PT}(extend(\mathfrak{m},p))$, where
|
Madarasi@19
|
395 |
$p\in{V_{1}\backslash\mathfrak{D}(\mathfrak{m}) \!\times\!V_{2}\backslash\mathfrak{R}(\mathfrak{m})}$.
|
alpar@2
|
396 |
\end{notation}
|
alpar@2
|
397 |
|
alpar@3
|
398 |
$Cons_{PT}$ will be used to check the consistency of the already
|
alpar@3
|
399 |
covered nodes, while $Cut_{PT}$ is for looking ahead to recognize if
|
alpar@3
|
400 |
no whole consistent mapping can contain the current mapping.
|
alpar@2
|
401 |
|
alpar@2
|
402 |
\subsection{Overview of the algorithm}
|
alpar@2
|
403 |
|
Madarasi@28
|
404 |
VF2 begins with an empty mapping and gradually extends it with respect to the consistency and cutting functions until a whole mapping is reached.
|
Madarasi@28
|
405 |
|
Madarasi@28
|
406 |
Algorithm~\ref{alg:VF2Pseu} is a high-level description of
|
Madarasi@28
|
407 |
the VF2 algorithm. Each state of the matching process can
|
Madarasi@19
|
408 |
be associated with a mapping $\mathfrak{m}$. The initial state
|
Madarasi@19
|
409 |
is associated with a mapping $\mathfrak{m}$, for which
|
Madarasi@19
|
410 |
$\mathfrak{D}(\mathfrak{m})=\emptyset$, i.e. it starts with an empty mapping.
|
alpar@2
|
411 |
|
alpar@2
|
412 |
|
alpar@2
|
413 |
\begin{algorithm}
|
Madarasi@13
|
414 |
\algtext*{EndIf}%ne nyomtasson end if-et
|
Madarasi@13
|
415 |
\algtext*{EndFor}%ne
|
Madarasi@13
|
416 |
\algtext*{EndProcedure}%ne nyomtasson ..
|
alpar@2
|
417 |
\caption{\hspace{0.5cm}$A\ high\ level\ description\ of\ VF2$}\label{alg:VF2Pseu}
|
alpar@2
|
418 |
\begin{algorithmic}[1]
|
alpar@2
|
419 |
|
Madarasi@19
|
420 |
\Procedure{VF2}{Mapping $\mathfrak{m}$, ProblemType $PT$}
|
Madarasi@19
|
421 |
\If{$\mathfrak{m}$ covers
|
Madarasi@19
|
422 |
$V_{1}$} \State Output($\mathfrak{m}$)
|
Madarasi@19
|
423 |
\Else
|
Madarasi@28
|
424 |
\State Compute the set $P_\mathfrak{m}$ of the candidate pairs for extending $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
|
Madarasi@19
|
425 |
$\neg$Cut$_{PT}$($p,\mathfrak{m}$)}
|
Madarasi@19
|
426 |
\State \textbf{call}
|
Madarasi@19
|
427 |
VF2($extend(\mathfrak{m},p)$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
|
alpar@2
|
428 |
\end{algorithmic}
|
alpar@2
|
429 |
\end{algorithm}
|
alpar@2
|
430 |
|
alpar@2
|
431 |
|
Madarasi@19
|
432 |
For the current mapping $\mathfrak{m}$, the algorithm computes $P_\mathfrak{m}$, the set of
|
Madarasi@28
|
433 |
candidate node pairs for extending the current mapping $\mathfrak{m}$.
|
alpar@2
|
434 |
|
Madarasi@19
|
435 |
For each pair $p$ in $P_\mathfrak{m}$, $Cons_{PT}(p,\mathfrak{m})$ and
|
Madarasi@19
|
436 |
$Cut_{PT}(p,\mathfrak{m})$ are evaluated. If the former is true and
|
Madarasi@19
|
437 |
the latter is false, the whole process is recursively applied to
|
Madarasi@19
|
438 |
$extend(\mathfrak{m},p)$. Otherwise, $extend(\mathfrak{m},p)$ is not consistent by $PT$, or it
|
Madarasi@19
|
439 |
can be proved that $\mathfrak{m}$ can not be extended to a whole mapping.
|
alpar@2
|
440 |
|
alpar@29
|
441 |
The correctness of the procedure follows from the claim below.
|
alpar@29
|
442 |
|
Madarasi@28
|
443 |
\begin{claim}\label{claim:consMapps}
|
alpar@3
|
444 |
Through consistent mappings, only consistent whole mappings can be
|
Madarasi@19
|
445 |
reached, and all the consistent whole mappings are reachable through
|
alpar@3
|
446 |
consistent mappings.
|
alpar@2
|
447 |
\end{claim}
|
alpar@2
|
448 |
|
Madarasi@19
|
449 |
Note that a mapping may be reached in exponentially many different ways, since the
|
Madarasi@19
|
450 |
order of extensions does not influence the nascent mapping.
|
alpar@2
|
451 |
|
Madarasi@28
|
452 |
However, one may make the following observations.
|
Madarasi@28
|
453 |
|
Madarasi@28
|
454 |
%\begin{claim}
|
Madarasi@28
|
455 |
%\label{claim:claimTotOrd}
|
Madarasi@28
|
456 |
%Let $\prec$ be an arbitrary total ordering relation on $V_{1}$. If
|
Madarasi@28
|
457 |
%the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
|
Madarasi@28
|
458 |
%\begin{center}
|
Madarasi@28
|
459 |
%$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
|
Madarasi@28
|
460 |
%\end{center}
|
Madarasi@28
|
461 |
%then no mapping can be reached more than once, and each whole mapping %remains reachable.
|
Madarasi@28
|
462 |
%\end{claim}
|
Madarasi@28
|
463 |
|
Madarasi@28
|
464 |
\begin{definition}
|
Madarasi@28
|
465 |
A total order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
|
Madarasi@28
|
466 |
$V_{1}$ is \textbf{matching order} if VF2 can cover $u_{\sigma(d)}$ on the $d$-th level for all $d\in\{1,..,|V_{1}|\}$.
|
Madarasi@28
|
467 |
\end{definition}
|
alpar@2
|
468 |
|
alpar@2
|
469 |
\begin{claim}
|
alpar@2
|
470 |
\label{claim:claimTotOrd}
|
Madarasi@28
|
471 |
If VF2 is prescribed to cover the nodes of $G_1$ according to a matching order, then no mapping can be reached more than once and each whole mapping remains reachable.
|
alpar@2
|
472 |
\end{claim}
|
alpar@2
|
473 |
|
Madarasi@28
|
474 |
Note that the cornerstone of the improvements to VF2 is to choose a proper
|
Madarasi@28
|
475 |
matching order.
|
alpar@2
|
476 |
|
Madarasi@19
|
477 |
\subsection{The candidate set}
|
alpar@2
|
478 |
\label{candidateComputingVF2}
|
Madarasi@19
|
479 |
Let $P_\mathfrak{m}$ be the set of the candidate pairs for inclusion in $\mathfrak{m}$.
|
alpar@2
|
480 |
|
alpar@2
|
481 |
\begin{notation}
|
Madarasi@19
|
482 |
Let $\mathbf{T_{1}(\mathfrak{m})}:=\{u \in V_{1}\backslash\mathfrak{D}(\mathfrak{m}) : \exists \tilde{u}\in{\mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}}\}$, and
|
Madarasi@19
|
483 |
$\mathbf{T_{2}(\mathfrak{m})} := \{v \in V_{2}\backslash\mathfrak{R}(\mathfrak{m}) : \exists\tilde{v}\in{\mathfrak{R}(\mathfrak{m}):(v,\tilde{v})\in E_{2}}\}$.
|
alpar@2
|
484 |
\end{notation}
|
alpar@2
|
485 |
|
Madarasi@28
|
486 |
The set $P_\mathfrak{m}$ contains the pairs of uncovered neighbours of covered
|
Madarasi@17
|
487 |
nodes, and if there is not such a node pair, all the pairs containing
|
alpar@3
|
488 |
two uncovered nodes are added. Formally, let
|
alpar@2
|
489 |
\[
|
Madarasi@19
|
490 |
P_\mathfrak{m}\!=\!
|
alpar@2
|
491 |
\begin{cases}
|
Madarasi@19
|
492 |
T_{1}(\mathfrak{m})\times T_{2}(\mathfrak{m})&\hspace{-0.15cm}\text{if }
|
Madarasi@19
|
493 |
T_{1}(\mathfrak{m})\!\neq\!\emptyset\ \text{and }T_{2}(\mathfrak{m})\!\neq
|
Madarasi@19
|
494 |
\emptyset,\\ (V_{1}\!\setminus\!\mathfrak{D}(\mathfrak{m}))\!\times\!(V_{2}\!\setminus\!\mathfrak{R}(\mathfrak{m}))
|
Madarasi@19
|
495 |
&\hspace{-0.15cm}\text{otherwise}.
|
alpar@2
|
496 |
\end{cases}
|
alpar@2
|
497 |
\]
|
alpar@2
|
498 |
|
alpar@2
|
499 |
\subsection{Consistency}
|
Madarasi@28
|
500 |
Let $p=(u,v)\in V_{1}\times V_{2}$, and suppose $\mathfrak{m}$ is a consistent mapping by
|
Madarasi@19
|
501 |
$PT$. $Cons_{PT}(p,\mathfrak{m})$ checks whether
|
Madarasi@28
|
502 |
adding pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
|
Madarasi@15
|
503 |
|
Madarasi@28
|
504 |
For example, the consistency function of the induced subgraph isomorphism problem is the following.
|
alpar@2
|
505 |
\begin{notation}
|
Madarasi@19
|
506 |
Let $\mathbf{\Gamma_{1} (u)}:=\{\tilde{u}\in V_{1} :
|
Madarasi@19
|
507 |
(u,\tilde{u})\in E_{1}\}$, and $\mathbf{\Gamma_{2}
|
Madarasi@28
|
508 |
(v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$. That is, $\mathbf{\Gamma_{i} (w)}$ denotes the set of neighbours of node $w$ in $G_i$ $(i=1,2)$.
|
alpar@2
|
509 |
\end{notation}
|
alpar@2
|
510 |
|
alpar@2
|
511 |
\begin{claim}
|
Madarasi@28
|
512 |
$extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ if and only if $\mathfrak{m}$ is consistent and $(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
|
Madarasi@28
|
513 |
\Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$.
|
Madarasi@28
|
514 |
\end{claim}
|
Madarasi@28
|
515 |
|
Madarasi@28
|
516 |
The following formulation gives an efficient way of calculating $Cons_{IND}$.
|
Madarasi@28
|
517 |
|
Madarasi@28
|
518 |
\begin{claim}
|
Madarasi@28
|
519 |
$Cons_{IND}((u,v),\mathfrak{m}):=Cons_{IND}(\mathfrak{m})\wedge\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
|
Madarasi@19
|
520 |
(\forall \tilde{u}\in \Gamma_{1}(u)
|
Madarasi@28
|
521 |
\cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is the consistency function for $IND$.
|
alpar@2
|
522 |
\end{claim}
|
alpar@2
|
523 |
|
alpar@2
|
524 |
\subsection{Cutting rules}
|
Madarasi@19
|
525 |
$Cut_{PT}(p,\mathfrak{m})$ is defined by a collection of efficiently
|
Madarasi@19
|
526 |
verifiable conditions. The requirement is that $Cut_{PT}(p,\mathfrak{m})$ can
|
Madarasi@19
|
527 |
be true only if it is impossible to extend $extend(\mathfrak{m},p)$ to a
|
alpar@3
|
528 |
whole mapping.
|
Madarasi@15
|
529 |
|
Madarasi@28
|
530 |
As an example, a cutting function of induced subgraph isomorphism problem is presented.
|
alpar@2
|
531 |
\begin{notation}
|
Madarasi@19
|
532 |
Let $\mathbf{\tilde{T}_{1}}(\mathfrak{m}):=(V_{1}\backslash
|
Madarasi@19
|
533 |
\mathfrak{D}(\mathfrak{m}))\backslash T_{1}(\mathfrak{m})$, and
|
Madarasi@19
|
534 |
\\ $\mathbf{\tilde{T}_{2}}(\mathfrak{m}):=(V_{2}\backslash
|
Madarasi@19
|
535 |
\mathfrak{R}(\mathfrak{m}))\backslash T_{2}(\mathfrak{m})$.
|
alpar@2
|
536 |
\end{notation}
|
Madarasi@15
|
537 |
|
alpar@2
|
538 |
\begin{claim}
|
Madarasi@19
|
539 |
$Cut_{IND}((u,v),\mathfrak{m}):= |\Gamma_{2} (v)\ \cap\ T_{2}(\mathfrak{m})| <
|
Madarasi@19
|
540 |
|\Gamma_{1} (u)\ \cap\ T_{1}(\mathfrak{m})| \vee |\Gamma_{2}(v)\cap
|
Madarasi@19
|
541 |
\tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}(u)\cap
|
Madarasi@28
|
542 |
\tilde{T}_{1}(\mathfrak{m})|$ is a cutting function for $IND$.
|
alpar@2
|
543 |
\end{claim}
|
alpar@2
|
544 |
|
Madarasi@22
|
545 |
\section{The VF2++ Algorithm}\label{sec:VF2ppAlg}
|
Madarasi@28
|
546 |
Although any matching order makes the search space of VF2 a
|
alpar@3
|
547 |
tree, its choice turns out to dramatically influence the number of
|
alpar@3
|
548 |
visited states. The goal is to determine an efficient one as quickly
|
alpar@3
|
549 |
as possible.
|
alpar@2
|
550 |
|
Madarasi@28
|
551 |
The main reason for the superiority of VF2++ over VF2 is twofold. Firstly,
|
alpar@29
|
552 |
taking into account the structure and the node labeling of the graph,
|
Madarasi@28
|
553 |
VF2++ determines a matching order in which most of the unfruitful
|
alpar@3
|
554 |
branches of the search space can be pruned immediately. Secondly,
|
alpar@3
|
555 |
introducing more efficient --- nevertheless still easier to compute
|
alpar@3
|
556 |
--- cutting rules reduces the chance of going astray even further.
|
alpar@2
|
557 |
|
Madarasi@28
|
558 |
In addition to the usual subgraph isomorphism problem, specialized versions
|
alpar@29
|
559 |
for induced subgraph and graph isomorphism problems have also been
|
Madarasi@22
|
560 |
designed.
|
alpar@2
|
561 |
|
Madarasi@28
|
562 |
Note that a weaker version of the cutting rules of VF2++ and an efficient
|
Madarasi@28
|
563 |
candidate set calculation method were described in~\cite{VF2Plus}.
|
alpar@2
|
564 |
|
alpar@3
|
565 |
It should be noted that all the methods described in this section are
|
Madarasi@22
|
566 |
extendable to handle directed graphs and edge labels as well.
|
alpar@3
|
567 |
The basic ideas and the detailed description of VF2++ are provided in
|
Madarasi@22
|
568 |
the following.\newline
|
alpar@2
|
569 |
|
Madarasi@19
|
570 |
The goal is to find a matching order in which the algorithm is able to
|
Madarasi@19
|
571 |
recognize inconsistency or prune the infeasible branches on the
|
Madarasi@19
|
572 |
highest levels and goes deep only if it is needed.
|
Madarasi@19
|
573 |
|
Madarasi@19
|
574 |
\begin{notation}
|
Madarasi@28
|
575 |
Let $\mathbf{Conn_{H}(u)}:=|\Gamma_{1}(u)\cap H|$, that is the
|
Madarasi@19
|
576 |
number of neighbours of u which are in H, where $u\in V_{1} $ and
|
Madarasi@19
|
577 |
$H\subseteq V_{1}$.
|
Madarasi@19
|
578 |
\end{notation}
|
Madarasi@19
|
579 |
|
Madarasi@19
|
580 |
The principal question is the following. Suppose a mapping $\mathfrak{m}$ is
|
Madarasi@19
|
581 |
given. For which node of $T_{1}(\mathfrak{m})$ is the hardest to find a
|
alpar@29
|
582 |
consistent pair in $G_{2}$? The more covered neighbors a node in
|
Madarasi@19
|
583 |
$T_{1}(\mathfrak{m})$ has --- i.e. the largest $Conn_{\mathfrak{D}(\mathfrak{m})}$ it has
|
alpar@29
|
584 |
---, the more rare-to-satisfy consistency constraints for its pair
|
Madarasi@19
|
585 |
are given.
|
Madarasi@19
|
586 |
|
Madarasi@28
|
587 |
Most of the graphs of biological and chemical structures are sparse, thus several nodes in
|
Madarasi@19
|
588 |
$T_{1}(\mathfrak{m})$ may have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$, which makes
|
Madarasi@19
|
589 |
reasonable to define a secondary and a tertiary order between them.
|
Madarasi@19
|
590 |
The observation above proves itself to be as determining, that the
|
alpar@29
|
591 |
secondary ordering prefers nodes with the most uncovered neighbors
|
Madarasi@19
|
592 |
among which have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$ to increase
|
Madarasi@28
|
593 |
$Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes as much, as possible. The tertiary ordering prefers nodes having the rarest uncovered labels in $G_2$.
|
Madarasi@19
|
594 |
|
Madarasi@28
|
595 |
Note that the secondary ordering is the same as ordering by degrees,
|
Madarasi@19
|
596 |
which is a static data in front of the above used.
|
Madarasi@19
|
597 |
|
Madarasi@19
|
598 |
These rules can easily result in a matching order which contains the
|
Madarasi@19
|
599 |
nodes of a long path successively, whose nodes may have low $Conn$ and
|
alpar@29
|
600 |
is easily to match into $G_{2}$. To try to avoid that, a
|
alpar@29
|
601 |
Breadth-First-Search order is used, and on each of its levels, the
|
alpar@29
|
602 |
ordering procedure described above is applied.
|
Madarasi@19
|
603 |
\newline
|
Madarasi@19
|
604 |
|
alpar@29
|
605 |
In the following, examples are shown, demonstrating that VF2 may be
|
alpar@29
|
606 |
slow are, even though a matching can be found easily by using a proper
|
Madarasi@19
|
607 |
matching order.
|
Madarasi@19
|
608 |
|
Madarasi@19
|
609 |
\begin{example}
|
Madarasi@19
|
610 |
Suppose $G_{1}$ can be mapped into $G_{2}$ in many ways
|
Madarasi@19
|
611 |
without node labels. Let $u\in V_{1}$ and $v\in V_{2}$.
|
Madarasi@19
|
612 |
\newline
|
Madarasi@19
|
613 |
$\mathcal{L}(u):=black$
|
Madarasi@19
|
614 |
\newline
|
Madarasi@19
|
615 |
$\mathcal{L}(v):=black$
|
Madarasi@19
|
616 |
\newline
|
Madarasi@22
|
617 |
$\mathcal{L}(\tilde{u}):=red \ \forall \tilde{u}\in V_{1}\backslash
|
Madarasi@22
|
618 |
\{u\}$
|
Madarasi@19
|
619 |
\newline
|
Madarasi@22
|
620 |
$\mathcal{L}(\tilde{v}):=red \ \forall \tilde{v}\in V_{2}\backslash
|
Madarasi@22
|
621 |
\{v\}$
|
Madarasi@19
|
622 |
|
Madarasi@19
|
623 |
Now, any mapping by $\mathcal{L}$ must contain $(u,v)$, since
|
Madarasi@19
|
624 |
$u$ is black and no node in $V_{2}$ has a black label except
|
Madarasi@19
|
625 |
$v$. If unfortunately $u$ were the last node which will get covered,
|
Madarasi@19
|
626 |
VF2 would check only in the last steps, whether $u$ can be matched to
|
Madarasi@19
|
627 |
$v$.
|
Madarasi@28
|
628 |
|
alpar@29
|
629 |
However, had $u$ been the first matched node, $u$ would have been
|
alpar@29
|
630 |
matched immediately to $v$, so all the mappings would have been
|
Madarasi@19
|
631 |
precluded in which node labels can not correspond.
|
Madarasi@19
|
632 |
\end{example}
|
Madarasi@19
|
633 |
|
Madarasi@19
|
634 |
\begin{example}
|
Madarasi@28
|
635 |
Suppose there is no node label given, and $G_{1}$ is a small graph that can not be mapped into $G_{2}$ and $u\in V_{1}$.
|
Madarasi@19
|
636 |
\newline
|
Madarasi@19
|
637 |
Let $G'_{1}:=(V_{1}\cup
|
Madarasi@19
|
638 |
\{u'_{1},u'_{2},..,u'_{k}\},E_{1}\cup
|
Madarasi@19
|
639 |
\{(u,u'_{1}),(u'_{1},u'_{2}),..,(u'_{k-1},u'_{k})\})$, that is,
|
Madarasi@19
|
640 |
$G'_{1}$ is $G_{1}\cup \{ a\ k$ long path, which is disjoint
|
Madarasi@19
|
641 |
from $G_{1}$ and one of its starting points is connected to $u\in
|
Madarasi@19
|
642 |
V_{1}\}$.
|
Madarasi@28
|
643 |
|
alpar@29
|
644 |
If, unfortunately, the nodes of the path were the first $k$ nodes in the
|
alpar@29
|
645 |
matching order, the algorithm would iterate through all the possible $k$
|
Madarasi@19
|
646 |
long paths in $G_{2}$, and it would recognize that no path can be
|
Madarasi@19
|
647 |
extended to $G'_{1}$.
|
Madarasi@19
|
648 |
\newline
|
Madarasi@19
|
649 |
However, had it started by the matching of $G_{1}$, it would not
|
Madarasi@19
|
650 |
have matched any nodes of the path.
|
Madarasi@19
|
651 |
\end{example}
|
Madarasi@19
|
652 |
|
Madarasi@19
|
653 |
These examples may look artificial, but the same problems also appear
|
Madarasi@19
|
654 |
in real-world instances, even though in a less obvious way.
|
Madarasi@19
|
655 |
|
Madarasi@28
|
656 |
%\subsection{Preparations}
|
Madarasi@28
|
657 |
%\begin{claim}
|
Madarasi@28
|
658 |
%\label{claim:claimCoverFromLeft}
|
Madarasi@28
|
659 |
%The total ordering relation uniquely determines a node order, in which
|
Madarasi@28
|
660 |
%the nodes of $V_{1}$ will be covered by VF2. From the point of
|
Madarasi@28
|
661 |
%view of the matching procedure, this means, that always the same node
|
Madarasi@28
|
662 |
%of $G_{1}$ will be covered on the $d$-th level.
|
Madarasi@28
|
663 |
%\end{claim}
|
alpar@2
|
664 |
|
Madarasi@28
|
665 |
%\begin{definition}
|
Madarasi@28
|
666 |
%An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
|
Madarasi@28
|
667 |
%$V_{1}$ is \textbf{matching order} if there exists $\prec$ total
|
Madarasi@28
|
668 |
%ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
|
Madarasi@28
|
669 |
%pair for $u_{\sigma(d)}$ for all $d\in\{1,..,|V_{1}|\}$.
|
Madarasi@28
|
670 |
%\end{definition}
|
alpar@2
|
671 |
|
Madarasi@28
|
672 |
%\begin{claim}\label{claim:MOclaim}
|
Madarasi@28
|
673 |
%A total ordering is matching order iff the nodes of every component
|
Madarasi@28
|
674 |
%form an interval in the node sequence, and every node connects to a
|
Madarasi@28
|
675 |
%previous node in its component except the first node of each component.
|
Madarasi@28
|
676 |
%\end{claim}
|
alpar@2
|
677 |
|
Madarasi@28
|
678 |
%In summary, a total ordering always uniquely determines a matching
|
Madarasi@28
|
679 |
%order, and every matching order can be determined by a total ordering,
|
Madarasi@28
|
680 |
%however, more than one different total orderings may determine the
|
Madarasi@28
|
681 |
%same matching order.
|
alpar@2
|
682 |
|
Madarasi@28
|
683 |
\subsection{Matching order}
|
alpar@2
|
684 |
\begin{notation}
|
Madarasi@19
|
685 |
Let \textbf{F$_\mathcal{M}$(l)}$:=|\{v\in V_{2} :
|
Madarasi@28
|
686 |
l=\mathcal{L}(v)\}|-|\{u\in \mathcal{M} : l=\mathcal{L}(u)\}|$,
|
Madarasi@19
|
687 |
where $l$ is a label and $\mathcal{M}\subseteq V_{1}$.
|
alpar@2
|
688 |
\end{notation}
|
alpar@2
|
689 |
|
Madarasi@28
|
690 |
\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{(-f)}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
|
alpar@2
|
691 |
\end{definition}
|
alpar@2
|
692 |
|
Madarasi@28
|
693 |
\begin{notation}
|
Madarasi@28
|
694 |
Let $deg(v)$ denote the degree of node $v$.
|
Madarasi@28
|
695 |
\end{notation}
|
Madarasi@28
|
696 |
|
Madarasi@28
|
697 |
\begin{algorithm}[H]
|
Madarasi@8
|
698 |
\algtext*{EndIf}
|
Madarasi@8
|
699 |
\algtext*{EndProcedure}
|
alpar@2
|
700 |
\algtext*{EndWhile}
|
Madarasi@13
|
701 |
\algtext*{EndFor}
|
alpar@2
|
702 |
\caption{\hspace{0.5cm}$The\ method\ of\ VF2++\ for\ determining\ the\ node\ order$}\label{alg:VF2PPPseu}
|
alpar@2
|
703 |
\begin{algorithmic}[1]
|
alpar@3
|
704 |
\Procedure{VF2++order}{} \State $\mathcal{M}$ := $\emptyset$
|
Madarasi@19
|
705 |
\Comment{matching order} \While{$V_{1}\backslash \mathcal{M}
|
alpar@3
|
706 |
\neq\emptyset$} \State $r\in$ arg max$_{deg}$ (arg
|
Madarasi@19
|
707 |
min$_{F_\mathcal{M}\circ \mathcal{L}}(V_{1}\backslash
|
alpar@3
|
708 |
\mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
|
alpar@3
|
709 |
root node $r$. \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
|
Madarasi@28
|
710 |
$d$-th level \State Process $V_d$ \Comment{See Algorithm~\ref{alg:VF2PPProcess1}} \EndFor
|
alpar@3
|
711 |
\EndWhile \EndProcedure
|
alpar@2
|
712 |
\end{algorithmic}
|
alpar@2
|
713 |
\end{algorithm}
|
alpar@2
|
714 |
|
alpar@2
|
715 |
\begin{algorithm}
|
Madarasi@8
|
716 |
\algtext*{EndIf}
|
Madarasi@8
|
717 |
\algtext*{EndProcedure}%ne nyomtasson ..
|
alpar@2
|
718 |
\algtext*{EndWhile}
|
Madarasi@8
|
719 |
\caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
|
alpar@2
|
720 |
\begin{algorithmic}[1]
|
Madarasi@17
|
721 |
\Procedure{VF2++ProcessLevel}{$V_{d}$} \While{$V_d\neq\emptyset$}
|
Madarasi@28
|
722 |
\State $m\in$ arg min$_{F_\mathcal{M}\circ\ \mathcal{L}}($ arg max$_{deg}($arg
|
alpar@3
|
723 |
max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
|
alpar@3
|
724 |
\State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
|
alpar@3
|
725 |
$F_\mathcal{M}$ \EndWhile \EndProcedure
|
alpar@2
|
726 |
\end{algorithmic}
|
alpar@2
|
727 |
\end{algorithm}
|
alpar@2
|
728 |
|
Madarasi@28
|
729 |
Algorithm~\ref{alg:VF2PPPseu} is a high-level description of the
|
alpar@4
|
730 |
matching order procedure of VF2++. It computes a BFS tree for each
|
Madarasi@19
|
731 |
component in ascending order of their rarest node labels and largest $deg$,
|
Madarasi@28
|
732 |
whose root vertex is the minimal node of its component. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
|
Madarasi@8
|
733 |
lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
|
Madarasi@8
|
734 |
to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
|
alpar@2
|
735 |
|
Madarasi@28
|
736 |
\begin{claim}
|
Madarasi@28
|
737 |
Algorithm~\ref{alg:VF2PPPseu} provides a matching order.
|
Madarasi@28
|
738 |
\end{claim}
|
alpar@2
|
739 |
|
alpar@2
|
740 |
|
alpar@2
|
741 |
\subsection{Cutting rules}
|
alpar@2
|
742 |
\label{VF2PPCuttingRules}
|
Madarasi@19
|
743 |
This section presents the cutting rules of VF2++, which are improved by using extra information coming from the node labels.
|
alpar@2
|
744 |
\begin{notation}
|
Madarasi@19
|
745 |
Let $\mathbf{\Gamma_{1}^{l}(u)}:=\{\tilde{u} : \mathcal{L}(\tilde{u})=l
|
Madarasi@19
|
746 |
\wedge \tilde{u}\in \Gamma_{1} (u)\}$ and
|
Madarasi@19
|
747 |
$\mathbf{\Gamma_{2}^{l}(v)}:=\{\tilde{v} : \mathcal{L}(\tilde{v})=l \wedge
|
Madarasi@19
|
748 |
\tilde{v}\in \Gamma_{2} (v)\}$, where $u\in V_{1}$, $v\in
|
Madarasi@19
|
749 |
V_{2}$ and $l$ is a label.
|
alpar@2
|
750 |
\end{notation}
|
alpar@2
|
751 |
|
Madarasi@28
|
752 |
\begin{claim}[Cutting function for ISO]
|
Madarasi@28
|
753 |
\[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!\neq\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| \neq |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for ISO.
|
alpar@2
|
754 |
\end{claim}
|
Madarasi@13
|
755 |
|
Madarasi@28
|
756 |
\begin{claim}[Cutting function for IND]
|
Madarasi@28
|
757 |
\[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for IND.
|
Madarasi@28
|
758 |
\end{claim}
|
Madarasi@28
|
759 |
|
Madarasi@28
|
760 |
\begin{claim}[Cutting function for SUB]
|
Madarasi@28
|
761 |
\[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\] is a cutting function for SUB.
|
Madarasi@19
|
762 |
\end{claim}
|
alpar@2
|
763 |
|
Madarasi@19
|
764 |
|
Madarasi@19
|
765 |
|
Madarasi@22
|
766 |
\section{Implementation details}\label{sec:VF2ppImpl}
|
alpar@3
|
767 |
This section provides a detailed summary of an efficient
|
alpar@3
|
768 |
implementation of VF2++.
|
Madarasi@28
|
769 |
\begin{notation}
|
Madarasi@28
|
770 |
Let $\Delta_1$ and $\Delta_2$ denote the largest degree in $G_1$ and $G_2$, respectively, and let $\Delta=\max\{\Delta_1,\Delta_2\}$.
|
Madarasi@28
|
771 |
\end{notation}
|
Madarasi@22
|
772 |
\subsection{Storing a mapping}
|
alpar@3
|
773 |
After fixing an arbitrary node order ($u_0, u_1, ..,
|
Madarasi@28
|
774 |
u_{|V_{1}|-1}$) of $G_{1}$, an array $M$ can be used to store
|
alpar@3
|
775 |
the current mapping in the following way.
|
alpar@2
|
776 |
\[
|
alpar@3
|
777 |
M[i] =
|
alpar@2
|
778 |
\begin{cases}
|
Madarasi@19
|
779 |
v & if\ (u_i,v)\ is\ in\ the\ mapping\\ INV\!ALI\!D &
|
Madarasi@17
|
780 |
if\ no\ node\ has\ been\ mapped\ to\ u_i,
|
alpar@2
|
781 |
\end{cases}
|
alpar@2
|
782 |
\]
|
Madarasi@28
|
783 |
where $i\in\{0,1, ..,|V_{1}|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
|
alpar@3
|
784 |
means "no node".
|
Madarasi@22
|
785 |
\subsection{Avoiding the recurrence}
|
alpar@4
|
786 |
The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
|
Madarasi@9
|
787 |
as a \textit{while loop}, which has a loop counter $depth$ denoting the
|
Madarasi@28
|
788 |
current depth of the recursion. Fixing a matching order, let $M$
|
Madarasi@28
|
789 |
denote the array storing the current mapping. Observe that
|
Madarasi@19
|
790 |
$M$ is $INV\!ALI\!D$ from index $depth$+1 and not $INV\!ALI\!D$ before
|
Madarasi@9
|
791 |
$depth$. $M[depth]$ changes
|
alpar@3
|
792 |
while the state is being processed, but the property is held before
|
alpar@3
|
793 |
both stepping back to a predecessor state and exploring a successor
|
alpar@3
|
794 |
state.
|
alpar@2
|
795 |
|
alpar@29
|
796 |
The necessary part of the candidate set is easy to maintain or
|
alpar@29
|
797 |
compute by following
|
Madarasi@28
|
798 |
the steps described in Section~\ref{candidateComputingVF2}. A much faster method
|
Madarasi@28
|
799 |
has been designed for biological and sparse graphs, see the next
|
alpar@3
|
800 |
section for details.
|
alpar@2
|
801 |
|
Madarasi@22
|
802 |
\subsection{Calculating the candidates for a node}
|
Madarasi@28
|
803 |
The task is not to maintain the candidate set, but to generate the
|
Madarasi@19
|
804 |
candidate nodes in $G_{2}$ for a given node $u\in V_{1}$. In
|
Madarasi@20
|
805 |
case of any of the three problem types and a mapping $\mathfrak{m}$, if a node $v\in
|
Madarasi@19
|
806 |
V_{2}$ is a potential pair of $u\in V_{1}$, then $\forall
|
Madarasi@20
|
807 |
u'\in \mathfrak{D}(\mathfrak{m}) : (u,u')\in
|
Madarasi@20
|
808 |
E_{1}\Rightarrow (v,\mathfrak{m}(u'))\in
|
alpar@29
|
809 |
E_{2}$. That is, each covered neighbor of $u$ has to be mapped to
|
alpar@29
|
810 |
a covered neighbor of $v$, i.e. selecting arbitrarily a covered neighbor $u'$ of $u$, all of the admissible candidates for $u$ are among the neighbors of $\mathfrak{m}(u')$.
|
alpar@2
|
811 |
|
Madarasi@28
|
812 |
Having said that, an algorithm running in $\Theta(\Delta_2)$ time is
|
alpar@3
|
813 |
describable if there exists a covered node in the component containing
|
Madarasi@17
|
814 |
$u$, and a linear one otherwise.
|
alpar@2
|
815 |
|
alpar@2
|
816 |
|
Madarasi@22
|
817 |
\subsection{Determining the node order}
|
alpar@3
|
818 |
This section describes how the node order preprocessing method of
|
alpar@3
|
819 |
VF2++ can efficiently be implemented.
|
alpar@2
|
820 |
|
alpar@3
|
821 |
For using lookup tables, the node labels are associated with the
|
alpar@3
|
822 |
numbers $\{0,1,..,|K|-1\}$, where $K$ is the set of the labels. It
|
Madarasi@9
|
823 |
enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
|
alpar@3
|
824 |
$\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
|
Madarasi@28
|
825 |
in $V_{2}$ having label $i$, which is easy to compute in
|
Madarasi@28
|
826 |
$\Theta(|V_{2}|)$ steps.
|
alpar@2
|
827 |
|
Madarasi@19
|
828 |
Representing $\mathcal{M}\subseteq V_{1}$ as an array of
|
Madarasi@28
|
829 |
size $|V_{1}|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done in-place by swapping nodes.
|
alpar@2
|
830 |
|
Madarasi@22
|
831 |
\subsection{Cutting rules}
|
alpar@29
|
832 |
Section~\ref{VF2PPCuttingRules} described the cutting rules
|
alpar@29
|
833 |
using the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
|
alpar@29
|
834 |
and $\tilde T_{2}$, which are dependent on the current mapping. The aim is to check the labeled cutting
|
Madarasi@28
|
835 |
rules of VF2++ in $\Theta(\Delta)$ time.
|
alpar@2
|
836 |
|
alpar@29
|
837 |
Firstly, suppose that these four sets are given a way, that
|
alpar@3
|
838 |
checking whether a node is in a certain set takes constant time,
|
alpar@3
|
839 |
e.g. they are given by their 0-1 characteristic vectors. Let $L$ be an
|
alpar@3
|
840 |
initially zero integer lookup table of size $|K|$. After incrementing
|
Madarasi@19
|
841 |
$L[\mathcal{L}(u')]$ for all $u'\in \Gamma_{1}(u) \cap T_{1}(\mathfrak{m})$ and
|
Madarasi@19
|
842 |
decrementing $L[\mathcal{L}(v')]$ for all $v'\in\Gamma_{2} (v) \cap
|
alpar@29
|
843 |
T_{2}(\mathfrak{m})$, the first part of the cutting rules can be checked in
|
Madarasi@28
|
844 |
$\Theta(\Delta)$ time by considering the proper signs of $L$. Setting $L$
|
Madarasi@28
|
845 |
to zero takes $\Theta(\Delta)$ time again, which makes it possible to use
|
Madarasi@9
|
846 |
the same table through the whole algorithm. The second part of the
|
alpar@3
|
847 |
cutting rules can be verified using the same method with $\tilde
|
Madarasi@19
|
848 |
T_{1}$ and $\tilde T_{2}$ instead of $T_{1}$ and
|
Madarasi@28
|
849 |
$T_{2}$. Thus, the overall time complexity is $\Theta(\Delta)$.
|
alpar@2
|
850 |
|
Madarasi@28
|
851 |
To maintain the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
|
Madarasi@28
|
852 |
and $\tilde T_{2}$, two other integer lookup tables storing the number of covered neighbours of the nodes of the two graphs can be used. This representation allows constant-time membership checking, furthermore it is maintainable in $\Theta(\Delta)$ time whenever a node pair is added or subtracted by incrementing
|
alpar@3
|
853 |
or decrementing the proper indices. A further improvement is that the
|
Madarasi@19
|
854 |
values of $L[\mathcal{L}(u')]$ in case of checking $u$ are dependent only on
|
Madarasi@28
|
855 |
$u$, i.e. on the current depth of the recursion, so for each $u\in V_{1}$, an array of pairs \textit{(label, number of such labels)} can store $L$. Note that these arrays are at most of size
|
Madarasi@28
|
856 |
$\Delta_1$ if pairs with non-appearing node labels are discarded.
|
alpar@2
|
857 |
|
Madarasi@19
|
858 |
Using similar techniques, the consistency function can be evaluated in
|
Madarasi@28
|
859 |
$\Theta(\Delta)$ steps, as well.
|
alpar@2
|
860 |
|
Madarasi@22
|
861 |
\section{Experimental results}\label{sec:ExpRes}
|
Madarasi@28
|
862 |
This section compares the performance of VF2++ and VF2~Plus. According to
|
Madarasi@19
|
863 |
our experience, both algorithms run faster than VF2 with orders of
|
Madarasi@19
|
864 |
magnitude, thus its inclusion was not reasonable.
|
alpar@2
|
865 |
|
alpar@29
|
866 |
The algorithms were implemented in C++ using the open-source LEMON
|
alpar@29
|
867 |
graph and network optimization
|
alpar@29
|
868 |
library~\cite{LEMON}\cite{o11:_lemon_open_sourc_c_graph_templ_librar}. The
|
alpar@29
|
869 |
tests were carried out on a Linux-based system with an Intel i7 X980
|
alpar@29
|
870 |
3.33 GHz CPU and 6 GB of RAM.
|
alpar@29
|
871 |
|
alpar@2
|
872 |
\subsection{Biological graphs}
|
alpar@3
|
873 |
The tests have been executed on a recent biological dataset created
|
alpar@3
|
874 |
for the International Contest on Pattern Search in Biological
|
Madarasi@28
|
875 |
Databases~\cite{Content}, which has been constructed of molecule,
|
Madarasi@7
|
876 |
protein and contact map graphs extracted from the Protein Data
|
Madarasi@28
|
877 |
Bank~\cite{ProteinDataBank}.
|
alpar@2
|
878 |
|
alpar@3
|
879 |
The molecule dataset contains small graphs with less than 100 nodes
|
alpar@3
|
880 |
and an average degree of less than 3. The protein dataset contains
|
alpar@3
|
881 |
graphs having 500-10 000 nodes and an average degree of 4, while the
|
alpar@3
|
882 |
contact map dataset contains graphs with 150-800 nodes and an average
|
alpar@3
|
883 |
degree of 20. \\
|
alpar@2
|
884 |
|
Madarasi@28
|
885 |
In the following, both the induced subgraph and the graph
|
Madarasi@28
|
886 |
isomorphism problems will be examined.
|
alpar@29
|
887 |
This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For the running times, please see Figure~\ref{fig:bioIND}.
|
Madarasi@7
|
888 |
|
Madarasi@7
|
889 |
In an other experiment, the nodes of each graph in the database had been
|
Madarasi@7
|
890 |
shuffled, and an isomorphism between the shuffled and the original
|
Madarasi@28
|
891 |
graph was searched. The running times are shown on Figure~\ref{fig:bioISO}.
|
Madarasi@7
|
892 |
|
Madarasi@7
|
893 |
|
Madarasi@17
|
894 |
\begin{figure}[H]
|
Madarasi@17
|
895 |
\vspace*{-2cm}
|
Madarasi@17
|
896 |
\hspace*{-1.5cm}
|
Madarasi@17
|
897 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@17
|
898 |
\begin{figure}[H]
|
Madarasi@17
|
899 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@28
|
900 |
\begin{axis}[title=Molecules IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@17
|
901 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@17
|
902 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
903 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
904 |
format/1000 sep = \kern 0.08em}]
|
Madarasi@17
|
905 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@17
|
906 |
\addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
|
Madarasi@17
|
907 |
size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
|
Madarasi@17
|
908 |
\end{axis}
|
Madarasi@17
|
909 |
\end{tikzpicture}
|
Madarasi@17
|
910 |
\caption{In the case of molecules, the algorithms have
|
Madarasi@17
|
911 |
similar behaviour, but VF2++ is almost two times faster even on such
|
Madarasi@17
|
912 |
small graphs.} \label{fig:INDMolecule}
|
Madarasi@17
|
913 |
\end{figure}
|
Madarasi@17
|
914 |
\end{subfigure}
|
Madarasi@17
|
915 |
\hspace*{1.5cm}
|
Madarasi@17
|
916 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@17
|
917 |
\begin{figure}[H]
|
Madarasi@17
|
918 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@28
|
919 |
\begin{axis}[title=Contact maps IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@17
|
920 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@17
|
921 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
922 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
923 |
format/1000 sep = \kern 0.08em}]
|
Madarasi@17
|
924 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@17
|
925 |
\addplot table {Orig/ContactMaps.128.txt};
|
Madarasi@17
|
926 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
Madarasi@17
|
927 |
{VF2PPLabel/ContactMaps.128.txt};
|
Madarasi@17
|
928 |
\end{axis}
|
Madarasi@17
|
929 |
\end{tikzpicture}
|
Madarasi@17
|
930 |
\caption{On contact maps, VF2++ runs almost in constant time, while VF2
|
alpar@29
|
931 |
Plus has a near-linear behavior.} \label{fig:INDContact}
|
Madarasi@17
|
932 |
\end{figure}
|
Madarasi@17
|
933 |
\end{subfigure}
|
Madarasi@17
|
934 |
|
Madarasi@17
|
935 |
\begin{center}
|
Madarasi@17
|
936 |
\vspace*{-0.5cm}
|
Madarasi@17
|
937 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@17
|
938 |
\begin{figure}[H]
|
Madarasi@17
|
939 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@28
|
940 |
\begin{axis}[title=Proteins IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@17
|
941 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@17
|
942 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
943 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
944 |
format/1000 sep = \kern 0.08em}] %\addplot+[only marks] table
|
Madarasi@17
|
945 |
{proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
|
Madarasi@17
|
946 |
table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
|
Madarasi@17
|
947 |
size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
|
Madarasi@17
|
948 |
\end{axis}
|
Madarasi@17
|
949 |
\end{tikzpicture}
|
Madarasi@28
|
950 |
\caption{Both of the algorithms have linear behaviour on protein
|
Madarasi@17
|
951 |
graphs. VF2++ is more than 10 times faster than VF2
|
Madarasi@17
|
952 |
Plus.} \label{fig:INDProt}
|
Madarasi@17
|
953 |
\end{figure}
|
Madarasi@17
|
954 |
\end{subfigure}
|
Madarasi@17
|
955 |
\end{center}
|
Madarasi@17
|
956 |
\vspace*{-0.5cm}
|
Madarasi@28
|
957 |
\caption{\normalsize{Induced subgraph isomorphism problem on biological graphs}}\label{fig:bioIND}
|
Madarasi@17
|
958 |
\end{figure}
|
Madarasi@17
|
959 |
|
alpar@2
|
960 |
|
alpar@2
|
961 |
\begin{figure}[H]
|
Madarasi@7
|
962 |
\vspace*{-2cm}
|
Madarasi@7
|
963 |
\hspace*{-1.5cm}
|
Madarasi@7
|
964 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
965 |
\begin{figure}[H]
|
Madarasi@7
|
966 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@28
|
967 |
\begin{axis}[title=Molecules ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
968 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
969 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
970 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
971 |
format/1000 sep = \kern 0.08em}]
|
Madarasi@7
|
972 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
973 |
\addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
|
Madarasi@7
|
974 |
size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
|
Madarasi@7
|
975 |
\end{axis}
|
Madarasi@7
|
976 |
\end{tikzpicture}
|
Madarasi@28
|
977 |
\caption{The results are close to each other on contact maps, but VF2++ seems to be slightly faster as the number of nodes increases.
|
Madarasi@28
|
978 |
}\label{fig:ISOMolecule}
|
Madarasi@7
|
979 |
\end{figure}
|
Madarasi@7
|
980 |
\end{subfigure}
|
Madarasi@7
|
981 |
\hspace*{1.5cm}
|
Madarasi@7
|
982 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
983 |
\begin{figure}[H]
|
Madarasi@7
|
984 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@28
|
985 |
\begin{axis}[title=Contact maps ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
986 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
987 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
988 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
989 |
format/1000 sep = \kern 0.08em}]
|
Madarasi@7
|
990 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
991 |
\addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
|
Madarasi@7
|
992 |
size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
|
Madarasi@7
|
993 |
\end{axis}
|
Madarasi@7
|
994 |
\end{tikzpicture}
|
Madarasi@28
|
995 |
\caption{In the case of molecules, there is no significant
|
Madarasi@28
|
996 |
difference, but VF2++ performs consistently better.}\label{fig:ISOContact}
|
Madarasi@7
|
997 |
\end{figure}
|
Madarasi@7
|
998 |
\end{subfigure}
|
Madarasi@7
|
999 |
|
alpar@2
|
1000 |
\begin{center}
|
Madarasi@7
|
1001 |
\vspace*{-0.5cm}
|
Madarasi@7
|
1002 |
\begin{subfigure}[b]{0.55\textwidth}
|
Madarasi@7
|
1003 |
\begin{figure}[H]
|
Madarasi@7
|
1004 |
\begin{tikzpicture}[trim axis left, trim axis right]
|
Madarasi@28
|
1005 |
\begin{axis}[title=Proteins ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
|
Madarasi@7
|
1006 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
Madarasi@7
|
1007 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1008 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1009 |
format/1000 sep = \kern 0.08em}]
|
Madarasi@7
|
1010 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
Madarasi@7
|
1011 |
\addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
|
Madarasi@7
|
1012 |
size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
|
Madarasi@7
|
1013 |
\end{axis}
|
Madarasi@7
|
1014 |
\end{tikzpicture}
|
Madarasi@28
|
1015 |
\caption{On protein graphs, VF2~Plus has a super linear time
|
Madarasi@7
|
1016 |
complexity, while VF2++ runs in near constant time. The difference
|
Madarasi@28
|
1017 |
is about two orders of magnitude on large graphs.}\label{fig:ISOProt}
|
Madarasi@7
|
1018 |
\end{figure}
|
Madarasi@7
|
1019 |
\end{subfigure}
|
Madarasi@7
|
1020 |
\end{center}
|
Madarasi@7
|
1021 |
\vspace*{-0.6cm}
|
Madarasi@28
|
1022 |
\caption{\normalsize{Graph isomorphism problem on biological graphs}}\label{fig:bioISO}
|
Madarasi@7
|
1023 |
\end{figure}
|
Madarasi@7
|
1024 |
|
Madarasi@7
|
1025 |
|
alpar@2
|
1026 |
|
alpar@2
|
1027 |
|
alpar@2
|
1028 |
\subsection{Random graphs}
|
Madarasi@28
|
1029 |
This section compares VF2++ with VF2~Plus on random graphs of large
|
alpar@3
|
1030 |
size. The node labels are uniformly distributed. Let $\delta$ denote
|
alpar@3
|
1031 |
the average degree. For the parameters of problems solved in the
|
alpar@3
|
1032 |
experiments, please see the top of each chart.
|
Madarasi@28
|
1033 |
\subsubsection{Graph isomorphism problem}
|
alpar@3
|
1034 |
To evaluate the efficiency of the algorithms in the case of graph
|
Madarasi@28
|
1035 |
isomorphism problem, random connected graphs of less than 20 000 nodes have been
|
alpar@3
|
1036 |
considered. Generating a random graph and shuffling its nodes, an
|
alpar@29
|
1037 |
isomorphism had to be found. Figure~\ref{fig:randISO} shows the running times
|
alpar@4
|
1038 |
on graph sets of various density.
|
alpar@2
|
1039 |
|
Madarasi@7
|
1040 |
|
Madarasi@7
|
1041 |
|
Madarasi@7
|
1042 |
|
Madarasi@12
|
1043 |
\begin{figure}
|
Madarasi@7
|
1044 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1045 |
\hspace*{-1.5cm}
|
Madarasi@7
|
1046 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1047 |
\begin{center}
|
alpar@2
|
1048 |
\begin{tikzpicture}
|
Madarasi@28
|
1049 |
\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1050 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1051 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1052 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1053 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1054 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1055 |
\addplot table {randGraph/iso/vf2pIso5_1.txt};
|
alpar@3
|
1056 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1057 |
{randGraph/iso/vf2ppIso5_1.txt};
|
alpar@2
|
1058 |
\end{axis}
|
alpar@2
|
1059 |
\end{tikzpicture}
|
alpar@2
|
1060 |
\end{center}
|
Madarasi@7
|
1061 |
\end{subfigure}
|
Madarasi@7
|
1062 |
%\hspace{1cm}
|
Madarasi@7
|
1063 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1064 |
\begin{center}
|
alpar@2
|
1065 |
\begin{tikzpicture}
|
Madarasi@28
|
1066 |
\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1067 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1068 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1069 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1070 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1071 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1072 |
\addplot table {randGraph/iso/vf2pIso10_1.txt};
|
alpar@3
|
1073 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1074 |
{randGraph/iso/vf2ppIso10_1.txt};
|
alpar@2
|
1075 |
\end{axis}
|
alpar@2
|
1076 |
\end{tikzpicture}
|
alpar@2
|
1077 |
\end{center}
|
Madarasi@7
|
1078 |
\end{subfigure}
|
Madarasi@7
|
1079 |
%%\hspace{1cm}
|
Madarasi@7
|
1080 |
\hspace*{-1.5cm}
|
Madarasi@7
|
1081 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1082 |
\begin{center}
|
alpar@2
|
1083 |
\begin{tikzpicture}
|
Madarasi@28
|
1084 |
\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1085 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1086 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1087 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1088 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1089 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1090 |
\addplot table {randGraph/iso/vf2pIso15_1.txt};
|
alpar@3
|
1091 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1092 |
{randGraph/iso/vf2ppIso15_1.txt};
|
alpar@2
|
1093 |
\end{axis}
|
alpar@2
|
1094 |
\end{tikzpicture}
|
alpar@2
|
1095 |
\end{center}
|
Madarasi@7
|
1096 |
\end{subfigure}
|
Madarasi@7
|
1097 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1098 |
\begin{center}
|
alpar@2
|
1099 |
\begin{tikzpicture}
|
Madarasi@28
|
1100 |
\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1101 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1102 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1103 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1104 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1105 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1106 |
\addplot table {randGraph/iso/vf2pIso100_1.txt};
|
alpar@3
|
1107 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1108 |
{randGraph/iso/vf2ppIso100_1.txt};
|
alpar@2
|
1109 |
\end{axis}
|
alpar@2
|
1110 |
\end{tikzpicture}
|
Madarasi@23
|
1111 |
\end{center}
|
Madarasi@7
|
1112 |
\end{subfigure}
|
alpar@2
|
1113 |
\vspace*{-0.8cm}
|
Madarasi@28
|
1114 |
\caption{Graph isomorphism problem on random graphs
|
Madarasi@23
|
1115 |
}\label{fig:randISO}
|
alpar@2
|
1116 |
\end{figure}
|
alpar@2
|
1117 |
|
alpar@2
|
1118 |
|
Madarasi@28
|
1119 |
\subsubsection{Induced subgraph isomorphism problem}
|
Madarasi@28
|
1120 |
This section presents a comparison of VF2++ and VF2~Plus in the case
|
Madarasi@28
|
1121 |
of induced subgraph isomorphism problem. In addition to the size of graph $G_2$, that of $G_1$ dramatically influences the hardness of
|
Madarasi@28
|
1122 |
a given problem too, so the overall picture is provided by examining graphs to be embedded of various size.
|
alpar@2
|
1123 |
|
Madarasi@17
|
1124 |
For each chart, a number $0<\rho< 1$ has been fixed, and the following
|
Madarasi@19
|
1125 |
has been executed 150 times. Generating a large graph $G_{2}$ of an average degree of $\delta$,
|
Madarasi@28
|
1126 |
choose 10 of its induced subgraphs having $\rho|V_{2}|$ nodes,
|
Madarasi@28
|
1127 |
and for all the 10 subgraphs find a mapping by using both graph
|
alpar@3
|
1128 |
matching algorithms. The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
|
Madarasi@23
|
1129 |
0.3, 0.8$ cases have been examined, see
|
Madarasi@28
|
1130 |
Figure~\ref{fig:randIND5},~\ref{fig:randIND10}~and~\ref{fig:randIND35}.
|
alpar@2
|
1131 |
|
alpar@2
|
1132 |
|
alpar@2
|
1133 |
|
alpar@2
|
1134 |
|
alpar@2
|
1135 |
|
Madarasi@12
|
1136 |
\begin{figure}
|
Madarasi@7
|
1137 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1138 |
\hspace*{-1.5cm}
|
alpar@2
|
1139 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1140 |
\begin{center}
|
alpar@2
|
1141 |
\begin{tikzpicture}
|
Madarasi@28
|
1142 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1143 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1144 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1145 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1146 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1147 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1148 |
\addplot table {randGraph/ind/vf2pInd5_0.05.txt};
|
alpar@3
|
1149 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1150 |
{randGraph/ind/vf2ppInd5_0.05.txt};
|
alpar@2
|
1151 |
\end{axis}
|
alpar@2
|
1152 |
\end{tikzpicture}
|
alpar@2
|
1153 |
\end{center}
|
alpar@2
|
1154 |
\end{subfigure}
|
alpar@2
|
1155 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1156 |
\begin{center}
|
alpar@2
|
1157 |
\begin{tikzpicture}
|
Madarasi@28
|
1158 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1159 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1160 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1161 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1162 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1163 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1164 |
\addplot table {randGraph/ind/vf2pInd5_0.1.txt};
|
alpar@3
|
1165 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1166 |
{randGraph/ind/vf2ppInd5_0.1.txt};
|
alpar@2
|
1167 |
\end{axis}
|
alpar@2
|
1168 |
\end{tikzpicture}
|
alpar@2
|
1169 |
\end{center}
|
alpar@2
|
1170 |
\end{subfigure}
|
Madarasi@7
|
1171 |
\hspace*{-1.5cm}
|
alpar@2
|
1172 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1173 |
\begin{center}
|
alpar@2
|
1174 |
\begin{tikzpicture}
|
Madarasi@28
|
1175 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1176 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1177 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1178 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1179 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1180 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1181 |
\addplot table {randGraph/ind/vf2pInd5_0.3.txt};
|
alpar@3
|
1182 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1183 |
{randGraph/ind/vf2ppInd5_0.3.txt};
|
alpar@2
|
1184 |
\end{axis}
|
alpar@2
|
1185 |
\end{tikzpicture}
|
alpar@2
|
1186 |
\end{center}
|
alpar@2
|
1187 |
\end{subfigure}
|
alpar@2
|
1188 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1189 |
\begin{center}
|
alpar@2
|
1190 |
\begin{tikzpicture}
|
Madarasi@28
|
1191 |
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1192 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1193 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1194 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1195 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1196 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1197 |
\addplot table {randGraph/ind/vf2pInd5_0.8.txt};
|
alpar@3
|
1198 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1199 |
{randGraph/ind/vf2ppInd5_0.8.txt};
|
alpar@2
|
1200 |
\end{axis}
|
alpar@2
|
1201 |
\end{tikzpicture}
|
Madarasi@23
|
1202 |
\end{center}
|
alpar@2
|
1203 |
\end{subfigure}
|
alpar@2
|
1204 |
\vspace*{-0.8cm}
|
Madarasi@28
|
1205 |
\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
|
Madarasi@28
|
1206 |
5}\label{fig:randIND5}
|
alpar@2
|
1207 |
\end{figure}
|
alpar@2
|
1208 |
|
alpar@2
|
1209 |
|
Madarasi@23
|
1210 |
\begin{figure}
|
Madarasi@7
|
1211 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1212 |
\hspace*{-1.5cm}
|
alpar@2
|
1213 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1214 |
\begin{center}
|
Madarasi@7
|
1215 |
\hspace*{-0.5cm}
|
alpar@2
|
1216 |
\begin{tikzpicture}
|
Madarasi@28
|
1217 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1218 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1219 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1220 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1221 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1222 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1223 |
\addplot table {randGraph/ind/vf2pInd10_0.05.txt};
|
alpar@3
|
1224 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1225 |
{randGraph/ind/vf2ppInd10_0.05.txt};
|
alpar@2
|
1226 |
\end{axis}
|
alpar@2
|
1227 |
\end{tikzpicture}
|
alpar@2
|
1228 |
\end{center}
|
alpar@2
|
1229 |
\end{subfigure}
|
alpar@2
|
1230 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1231 |
\begin{center}
|
Madarasi@7
|
1232 |
\hspace*{-0.5cm}
|
alpar@2
|
1233 |
\begin{tikzpicture}
|
Madarasi@28
|
1234 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1235 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1236 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1237 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1238 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1239 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1240 |
\addplot table {randGraph/ind/vf2pInd10_0.1.txt};
|
alpar@3
|
1241 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1242 |
{randGraph/ind/vf2ppInd10_0.1.txt};
|
alpar@2
|
1243 |
\end{axis}
|
alpar@2
|
1244 |
\end{tikzpicture}
|
alpar@2
|
1245 |
\end{center}
|
alpar@2
|
1246 |
\end{subfigure}
|
Madarasi@7
|
1247 |
\hspace*{-1.5cm}
|
alpar@2
|
1248 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1249 |
\begin{center}
|
alpar@2
|
1250 |
\begin{tikzpicture}
|
Madarasi@28
|
1251 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1252 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1253 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1254 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1255 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1256 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1257 |
\addplot table {randGraph/ind/vf2pInd10_0.3.txt};
|
alpar@3
|
1258 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1259 |
{randGraph/ind/vf2ppInd10_0.3.txt};
|
alpar@2
|
1260 |
\end{axis}
|
alpar@2
|
1261 |
\end{tikzpicture}
|
alpar@2
|
1262 |
\end{center}
|
alpar@2
|
1263 |
\end{subfigure}
|
alpar@2
|
1264 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1265 |
\begin{center}
|
alpar@2
|
1266 |
\begin{tikzpicture}
|
Madarasi@28
|
1267 |
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1268 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1269 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1270 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1271 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1272 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1273 |
\addplot table {randGraph/ind/vf2pInd10_0.8.txt};
|
alpar@3
|
1274 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1275 |
{randGraph/ind/vf2ppInd10_0.8.txt};
|
alpar@2
|
1276 |
\end{axis}
|
alpar@2
|
1277 |
\end{tikzpicture}
|
Madarasi@23
|
1278 |
\end{center}
|
alpar@2
|
1279 |
\end{subfigure}
|
alpar@2
|
1280 |
\vspace*{-0.8cm}
|
Madarasi@28
|
1281 |
\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
|
Madarasi@28
|
1282 |
10}\label{fig:randIND10}
|
alpar@2
|
1283 |
\end{figure}
|
alpar@2
|
1284 |
|
alpar@2
|
1285 |
|
alpar@2
|
1286 |
|
Madarasi@23
|
1287 |
\begin{figure}
|
Madarasi@7
|
1288 |
\vspace*{-1.5cm}
|
Madarasi@7
|
1289 |
\hspace*{-1.5cm}
|
alpar@2
|
1290 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1291 |
\begin{center}
|
alpar@2
|
1292 |
\begin{tikzpicture}
|
Madarasi@28
|
1293 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1294 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1295 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1296 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1297 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1298 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1299 |
\addplot table {randGraph/ind/vf2pInd35_0.05.txt};
|
alpar@3
|
1300 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1301 |
{randGraph/ind/vf2ppInd35_0.05.txt};
|
alpar@2
|
1302 |
\end{axis}
|
alpar@2
|
1303 |
\end{tikzpicture}
|
alpar@2
|
1304 |
\end{center}
|
alpar@2
|
1305 |
\end{subfigure}
|
alpar@2
|
1306 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1307 |
\begin{center}
|
alpar@2
|
1308 |
\begin{tikzpicture}
|
Madarasi@28
|
1309 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1310 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1311 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1312 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1313 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1314 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1315 |
\addplot table {randGraph/ind/vf2pInd35_0.1.txt};
|
alpar@3
|
1316 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1317 |
{randGraph/ind/vf2ppInd35_0.1.txt};
|
alpar@2
|
1318 |
\end{axis}
|
alpar@2
|
1319 |
\end{tikzpicture}
|
alpar@2
|
1320 |
\end{center}
|
alpar@2
|
1321 |
\end{subfigure}
|
Madarasi@7
|
1322 |
\hspace*{-1.5cm}
|
alpar@2
|
1323 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1324 |
\begin{center}
|
alpar@2
|
1325 |
\begin{tikzpicture}
|
Madarasi@28
|
1326 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1327 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1328 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1329 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1330 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1331 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1332 |
\addplot table {randGraph/ind/vf2pInd35_0.3.txt};
|
alpar@3
|
1333 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1334 |
{randGraph/ind/vf2ppInd35_0.3.txt};
|
alpar@2
|
1335 |
\end{axis}
|
alpar@2
|
1336 |
\end{tikzpicture}
|
alpar@2
|
1337 |
\end{center}
|
alpar@2
|
1338 |
\end{subfigure}
|
alpar@2
|
1339 |
\begin{subfigure}[b]{0.55\textwidth}
|
alpar@2
|
1340 |
\begin{center}
|
alpar@2
|
1341 |
\begin{tikzpicture}
|
Madarasi@28
|
1342 |
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
|
alpar@3
|
1343 |
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
|
alpar@3
|
1344 |
west},scaled x ticks = false,x tick label style={/pgf/number
|
Madarasi@28
|
1345 |
format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
|
Madarasi@28
|
1346 |
format/1000 sep = \kern 0.08em}]
|
alpar@2
|
1347 |
%\addplot+[only marks] table {proteinsOrig.txt};
|
alpar@2
|
1348 |
\addplot table {randGraph/ind/vf2pInd35_0.8.txt};
|
alpar@3
|
1349 |
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
|
alpar@3
|
1350 |
{randGraph/ind/vf2ppInd35_0.8.txt};
|
alpar@2
|
1351 |
\end{axis}
|
alpar@2
|
1352 |
\end{tikzpicture}
|
Madarasi@23
|
1353 |
\end{center}
|
alpar@2
|
1354 |
\end{subfigure}
|
alpar@2
|
1355 |
\vspace*{-0.8cm}
|
Madarasi@28
|
1356 |
\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
|
Madarasi@28
|
1357 |
35}\label{fig:randIND35}
|
alpar@2
|
1358 |
\end{figure}
|
alpar@2
|
1359 |
|
alpar@2
|
1360 |
|
alpar@29
|
1361 |
As the experiments above demonstrates, VF2++ is faster than VF2~Plus and able to
|
alpar@3
|
1362 |
handle really large graphs in milliseconds. Note that when $IND$ was
|
Madarasi@28
|
1363 |
considered and the graph to be embedded had proportionally few nodes ($\rho =
|
Madarasi@28
|
1364 |
0.05$, or $\rho = 0.1$), then VF2~Plus produced some inefficient node
|
alpar@4
|
1365 |
orders (e.g. see the $\delta=10$ case on
|
Madarasi@17
|
1366 |
Figure~\ref{fig:randIND10}). If these instances had been excluded, the
|
Madarasi@28
|
1367 |
charts would have looked similarly to the other ones.
|
alpar@3
|
1368 |
Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
|
alpar@29
|
1369 |
Plus slow down slightly, but remain practically usable even on graphs
|
alpar@3
|
1370 |
having 10 000 nodes.
|
alpar@2
|
1371 |
|
alpar@2
|
1372 |
|
alpar@2
|
1373 |
|
alpar@2
|
1374 |
|
alpar@3
|
1375 |
|
alpar@2
|
1376 |
\section{Conclusion}
|
alpar@29
|
1377 |
This paper presented VF2++, a new graph matching algorithm based on VF2, and analyzed it from a practical point of view.
|
alpar@2
|
1378 |
|
alpar@3
|
1379 |
Recognizing the importance of the node order and determining an
|
alpar@3
|
1380 |
efficient one, VF2++ is able to match graphs of thousands of nodes in
|
alpar@3
|
1381 |
near practically linear time including preprocessing. In addition to
|
Madarasi@28
|
1382 |
the proper order, VF2++ uses more efficient cutting
|
alpar@29
|
1383 |
rules, which are easy to compute and make the algorithm able to prune
|
alpar@3
|
1384 |
most of the unfruitful branches without going astray.
|
alpar@2
|
1385 |
|
alpar@3
|
1386 |
In order to show the efficiency of the new method, it has been
|
Madarasi@28
|
1387 |
compared to VF2~Plus~\cite{VF2Plus}, which is the best contemporary algorithm.
|
alpar@2
|
1388 |
|
Madarasi@28
|
1389 |
The experiments show that VF2++ consistently outperforms VF2~Plus on
|
alpar@3
|
1390 |
biological graphs. It seems to be asymptotically faster on protein and
|
Madarasi@28
|
1391 |
on contact map graphs in the case of induced subgraph isomorphism problem,
|
Madarasi@28
|
1392 |
while in the case of graph isomorphism problem, it has definitely better
|
alpar@29
|
1393 |
asymptotic behavior on protein graphs.
|
alpar@2
|
1394 |
|
alpar@3
|
1395 |
Regarding random sparse graphs, not only has VF2++ proved itself to be
|
alpar@29
|
1396 |
faster than VF2~Plus, but it also has a practically linear behavior both
|
Madarasi@28
|
1397 |
in the case of induced subgraph and graph isomorphism problems.
|
alpar@2
|
1398 |
|
alpar@25
|
1399 |
%%%%%%%%%%%%%%%%
|
alpar@25
|
1400 |
\section*{Acknowledgement} \label{sec:ack}
|
alpar@25
|
1401 |
%%%%%%%%%%%%%%%%
|
alpar@25
|
1402 |
This research project was initiated and sponsored by QuantumBio
|
Madarasi@28
|
1403 |
Inc.~\cite{QUANTUMBIO}.
|
alpar@25
|
1404 |
|
alpar@25
|
1405 |
The authors were supported by the Hungarian Scientific Research Fund -
|
alpar@25
|
1406 |
OTKA, K109240 and by the J\'anos Bolyai Research Fellowship program of
|
alpar@25
|
1407 |
the Hungarian Academy of Sciences.
|
alpar@2
|
1408 |
|
alpar@0
|
1409 |
|
alpar@0
|
1410 |
%% The Appendices part is started with the command \appendix;
|
alpar@0
|
1411 |
%% appendix sections are then done as normal sections
|
alpar@0
|
1412 |
%% \appendix
|
alpar@0
|
1413 |
|
alpar@0
|
1414 |
%% \section{}
|
alpar@0
|
1415 |
%% \label{}
|
alpar@0
|
1416 |
|
alpar@0
|
1417 |
%% If you have bibdatabase file and want bibtex to generate the
|
alpar@0
|
1418 |
%% bibitems, please use
|
alpar@0
|
1419 |
%%
|
alpar@3
|
1420 |
\bibliographystyle{elsarticle-num} \bibliography{bibliography}
|
alpar@0
|
1421 |
|
alpar@0
|
1422 |
%% else use the following coding to input the bibitems directly in the
|
alpar@0
|
1423 |
%% TeX file.
|
alpar@0
|
1424 |
|
alpar@2
|
1425 |
%% \begin{thebibliography}{00}
|
alpar@0
|
1426 |
|
alpar@2
|
1427 |
%% %% \bibitem{label}
|
alpar@2
|
1428 |
%% %% Text of bibliographic item
|
alpar@0
|
1429 |
|
alpar@2
|
1430 |
%% \bibitem{}
|
alpar@0
|
1431 |
|
alpar@2
|
1432 |
%% \end{thebibliography}
|
alpar@2
|
1433 |
|
alpar@0
|
1434 |
\end{document}
|
alpar@0
|
1435 |
\endinput
|
alpar@0
|
1436 |
%%
|
alpar@0
|
1437 |
%% End of file `elsarticle-template-num.tex'.
|
alpar@29
|
1438 |
|
alpar@29
|
1439 |
%% LocalWords: Subgraph Isomorphism datasets subgraphs subgraph FPT
|
alpar@29
|
1440 |
%% LocalWords: isomorphism hypergraph Ullmann Nauty precompute BFS
|
alpar@29
|
1441 |
%% LocalWords: bijection isomorphisms undirected infeasible dataset
|
alpar@29
|
1442 |
%% LocalWords: preprocessing incrementing decrementing neighbours
|
alpar@29
|
1443 |
%% LocalWords: expectable bioinformatics
|