damecco.tex
author Madarasi Peter
Wed, 30 Nov 2016 16:52:36 +0100
changeset 22 1a4874982d84
parent 20 80d56dee41d9
child 23 b098561f70fe
permissions -rw-r--r--
outline, fine tune
alpar@0
     1
%% 
alpar@0
     2
%% Copyright 2007, 2008, 2009 Elsevier Ltd
alpar@0
     3
%% 
alpar@0
     4
%% This file is part of the 'Elsarticle Bundle'.
alpar@0
     5
%% ---------------------------------------------
alpar@0
     6
%% 
alpar@0
     7
%% It may be distributed under the conditions of the LaTeX Project Public
alpar@0
     8
%% License, either version 1.2 of this license or (at your option) any
alpar@0
     9
%% later version.  The latest version of this license is in
alpar@0
    10
%%    http://www.latex-project.org/lppl.txt
alpar@0
    11
%% and version 1.2 or later is part of all distributions of LaTeX
alpar@0
    12
%% version 1999/12/01 or later.
alpar@0
    13
%% 
alpar@0
    14
%% The list of all files belonging to the 'Elsarticle Bundle' is
alpar@0
    15
%% given in the file `manifest.txt'.
alpar@0
    16
%% 
alpar@0
    17
alpar@0
    18
%% Template article for Elsevier's document class `elsarticle'
alpar@0
    19
%% with numbered style bibliographic references
alpar@0
    20
%% SP 2008/03/01
alpar@0
    21
alpar@0
    22
\documentclass[preprint,12pt]{elsarticle}
alpar@0
    23
alpar@0
    24
%% Use the option review to obtain double line spacing
alpar@0
    25
%% \documentclass[authoryear,preprint,review,12pt]{elsarticle}
alpar@0
    26
alpar@0
    27
%% Use the options 1p,twocolumn; 3p; 3p,twocolumn; 5p; or 5p,twocolumn
alpar@0
    28
%% for a journal layout:
alpar@0
    29
%% \documentclass[final,1p,times]{elsarticle}
alpar@0
    30
%% \documentclass[final,1p,times,twocolumn]{elsarticle}
alpar@0
    31
%% \documentclass[final,3p,times]{elsarticle}
alpar@0
    32
%% \documentclass[final,3p,times,twocolumn]{elsarticle}
alpar@0
    33
%% \documentclass[final,5p,times]{elsarticle}
alpar@0
    34
%% \documentclass[final,5p,times,twocolumn]{elsarticle}
alpar@0
    35
alpar@0
    36
%% For including figures, graphicx.sty has been loaded in
alpar@0
    37
%% elsarticle.cls. If you prefer to use the old commands
alpar@0
    38
%% please give \usepackage{epsfig}
alpar@0
    39
alpar@0
    40
%% The amssymb package provides various useful mathematical symbols
alpar@0
    41
\usepackage{amssymb}
alpar@0
    42
%% The amsthm package provides extended theorem environments
alpar@0
    43
%% \usepackage{amsthm}
alpar@0
    44
alpar@0
    45
%% The lineno packages adds line numbers. Start line numbering with
alpar@0
    46
%% \begin{linenumbers}, end it with \end{linenumbers}. Or switch it on
alpar@0
    47
%% for the whole article with \linenumbers.
alpar@0
    48
%% \usepackage{lineno}
alpar@0
    49
alpar@2
    50
\usepackage{amsmath}
alpar@2
    51
%% \usepackage[pdftex]{graphicx}
alpar@2
    52
alpar@2
    53
\usepackage{pgfplots}
alpar@2
    54
\pgfplotsset{width=9cm}
alpar@2
    55
\pgfplotsset{compat=1.8}
alpar@2
    56
alpar@2
    57
\usepackage{caption}
alpar@2
    58
\usepackage{subcaption} 
alpar@2
    59
alpar@2
    60
\usepackage{algorithm}
alpar@2
    61
\usepackage{algpseudocode}
alpar@2
    62
\usepackage{tikz}
alpar@2
    63
alpar@2
    64
\usepackage{amsthm,amssymb}
alpar@2
    65
\renewcommand{\qedsymbol}{\rule{0.7em}{0.7em}}
alpar@2
    66
alpar@2
    67
\newtheorem{theorem}{Theorem}[subsection]
alpar@2
    68
\newtheorem{corollary}{Corollary}[theorem]
alpar@2
    69
\newtheorem{claim}[theorem]{Claim}
alpar@2
    70
alpar@2
    71
\newtheorem{definition}{Definition}[subsection]
alpar@2
    72
\newtheorem{notation}{Notation}[subsection]
alpar@2
    73
\newtheorem{example}{Example}[subsection]
alpar@2
    74
\usetikzlibrary{decorations.markings}
alpar@2
    75
\let\oldproofname=\proofname
alpar@2
    76
%% \renewcommand{\proofname}{\rm\bf{Proof:}}
alpar@2
    77
Madarasi@7
    78
\captionsetup{font=normalsize}
Madarasi@7
    79
alpar@1
    80
\journal{Discrete Applied Mathematics}
alpar@0
    81
alpar@0
    82
\begin{document}
alpar@0
    83
alpar@0
    84
\begin{frontmatter}
alpar@0
    85
alpar@0
    86
%% Title, authors and addresses
alpar@0
    87
alpar@0
    88
%% use the tnoteref command within \title for footnotes;
alpar@0
    89
%% use the tnotetext command for theassociated footnote;
alpar@0
    90
%% use the fnref command within \author or \address for footnotes;
alpar@0
    91
%% use the fntext command for theassociated footnote;
alpar@0
    92
%% use the corref command within \author for corresponding author footnotes;
alpar@0
    93
%% use the cortext command for theassociated footnote;
alpar@0
    94
%% use the ead command for the email address,
alpar@0
    95
%% and the form \ead[url] for the home page:
alpar@0
    96
%% \title{Title\tnoteref{label1}}
alpar@0
    97
%% \tnotetext[label1]{}
alpar@0
    98
%% \author{Name\corref{cor1}\fnref{label2}}
alpar@0
    99
%% \ead{email address}
alpar@0
   100
%% \ead[url]{home page}
alpar@0
   101
%% \fntext[label2]{}
alpar@0
   102
%% \cortext[cor1]{}
alpar@0
   103
%% \address{Address\fnref{label3}}
alpar@0
   104
%% \fntext[label3]{}
alpar@0
   105
alpar@1
   106
\title{Improved Algorithms for Matching Biological Graphs}
alpar@0
   107
alpar@0
   108
%% use optional labels to link authors explicitly to addresses:
alpar@0
   109
%% \author[label1,label2]{}
alpar@0
   110
%% \address[label1]{}
alpar@0
   111
%% \address[label2]{}
alpar@0
   112
alpar@1
   113
\author{Alp{\'a}r J{\"u}ttner and P{\'e}ter Madarasi}
alpar@0
   114
alpar@1
   115
\address{Dept of Operations Research, ELTE}
alpar@0
   116
alpar@0
   117
\begin{abstract}
alpar@1
   118
Subgraph isomorphism is a well-known NP-Complete problem, while its
alpar@1
   119
special case, the graph isomorphism problem is one of the few problems
alpar@1
   120
in NP neither known to be in P nor NP-Complete. Their appearance in
alpar@1
   121
many fields of application such as pattern analysis, computer vision
alpar@1
   122
questions and the analysis of chemical and biological systems has
alpar@1
   123
fostered the design of various algorithms for handling special graph
alpar@1
   124
structures.
alpar@0
   125
alpar@1
   126
This paper presents VF2++, a new algorithm based on the original VF2,
alpar@1
   127
which runs significantly faster on most test cases and performs
alpar@1
   128
especially well on special graph classes stemming from biological
alpar@1
   129
questions. VF2++ handles graphs of thousands of nodes in practically
alpar@1
   130
near linear time including preprocessing. Not only is it an improved
alpar@1
   131
version of VF2, but in fact, it is by far the fastest existing
Madarasi@19
   132
algorithm especially on biological graphs.
alpar@1
   133
alpar@1
   134
The reason for VF2++' superiority over VF2 is twofold. Firstly, taking
alpar@1
   135
into account the structure and the node labeling of the graph, VF2++
alpar@1
   136
determines a state order in which most of the unfruitful branches of
alpar@1
   137
the search space can be pruned immediately. Secondly, introducing more
alpar@1
   138
efficient - nevertheless still easier to compute - cutting rules
alpar@1
   139
reduces the chance of going astray even further.
alpar@1
   140
alpar@1
   141
In addition to the usual subgraph isomorphism, specialized versions
alpar@1
   142
for induced subgraph isomorphism and for graph isomorphism are
alpar@1
   143
presented. VF2++ has gained a runtime improvement of one order of
alpar@1
   144
magnitude respecting induced subgraph isomorphism and a better
alpar@1
   145
asymptotical behaviour in the case of graph isomorphism problem.
alpar@1
   146
alpar@1
   147
After having provided the description of VF2++, in order to evaluate
alpar@1
   148
its effectiveness, an extensive comparison to the contemporary other
alpar@1
   149
algorithms is shown, using a wide range of inputs, including both real
alpar@1
   150
life biological and chemical datasets and standard randomly generated
alpar@1
   151
graph series.
alpar@1
   152
alpar@1
   153
The work was motivated and sponsored by QuantumBio Inc., and all the
alpar@1
   154
developed algorithms are available as the part of the open source
alpar@1
   155
LEMON graph and network optimization library
alpar@1
   156
(http://lemon.cs.elte.hu).
alpar@0
   157
\end{abstract}
alpar@0
   158
alpar@0
   159
\begin{keyword}
alpar@0
   160
%% keywords here, in the form: keyword \sep keyword
alpar@0
   161
alpar@0
   162
%% PACS codes here, in the form: \PACS code \sep code
alpar@0
   163
alpar@0
   164
%% MSC codes here, in the form: \MSC code \sep code
alpar@0
   165
%% or \MSC[2008] code \sep code (2000 is the default)
alpar@0
   166
alpar@0
   167
\end{keyword}
alpar@0
   168
alpar@0
   169
\end{frontmatter}
alpar@0
   170
alpar@0
   171
%% \linenumbers
alpar@0
   172
alpar@0
   173
%% main text
alpar@2
   174
\section{Introduction}
alpar@2
   175
\label{sec:intro}
alpar@2
   176
alpar@3
   177
In the last decades, combinatorial structures, and especially graphs
alpar@3
   178
have been considered with ever increasing interest, and applied to the
alpar@3
   179
solution of several new and revised questions.  The expressiveness,
alpar@3
   180
the simplicity and the studiedness of graphs make them practical for
alpar@3
   181
modelling and appear constantly in several seemingly independent
Madarasi@19
   182
fields, such as bioinformatics and chemistry.
alpar@2
   183
alpar@3
   184
Complex biological systems arise from the interaction and cooperation
alpar@3
   185
of plenty of molecular components. Getting acquainted with such
Madarasi@19
   186
systems at the molecular level is of primary importance, since
alpar@3
   187
protein-protein interaction, DNA-protein interaction, metabolic
alpar@3
   188
interaction, transcription factor binding, neuronal networks, and
Madarasi@19
   189
hormone signaling networks can be understood this way.
alpar@2
   190
Madarasi@19
   191
Many chemical and biological structures can easily be modeled
Madarasi@19
   192
as graphs, for instance, a molecular structure can be
Madarasi@19
   193
considered as a graph, whose nodes correspond to atoms and whose
Madarasi@19
   194
edges to chemical bonds. The similarity and dissimilarity of
Madarasi@19
   195
objects corresponding to nodes are incorporated to the model
Madarasi@19
   196
by \emph{node labels}. Understanding such networks basically
Madarasi@19
   197
requires finding specific subgraphs, thus calls for efficient
Madarasi@19
   198
graph matching algorithms.
alpar@2
   199
Madarasi@19
   200
Other real-world fields related to some
Madarasi@19
   201
variants of graph matching include pattern recognition
alpar@3
   202
and machine vision \cite{HorstBunkeApplications}, symbol recognition
alpar@3
   203
\cite{CordellaVentoSymbolRecognition}, face identification
alpar@3
   204
\cite{JianzhuangYongFaceIdentification}.  \\
alpar@2
   205
alpar@3
   206
Subgraph and induced subgraph matching problems are known to be
alpar@3
   207
NP-Complete\cite{SubgraphNPC}, while the graph isomorphism problem is
alpar@3
   208
one of the few problems in NP neither known to be in P nor
alpar@3
   209
NP-Complete. Although polynomial time isomorphism algorithms are known
alpar@3
   210
for various graph classes, like trees and planar
alpar@3
   211
graphs\cite{PlanarGraphIso}, bounded valence
alpar@3
   212
graphs\cite{BondedDegGraphIso}, interval graphs\cite{IntervalGraphIso}
Madarasi@19
   213
or permutation graphs\cite{PermGraphIso}, and recently, an FPT algorithm has been presented for the coloured hypergraph isomorphism problem in \cite{ColoredHiperGraphIso}.
alpar@2
   214
alpar@3
   215
In the following, some algorithms based on other approaches are
Madarasi@19
   216
summarized, which do not need any restrictions on the graphs. Even though,
alpar@3
   217
an overall polynomial behaviour is not expectable from such an
Madarasi@19
   218
alternative, it may often have good practical performance, in fact,
Madarasi@19
   219
it might be the best choice even on a graph class for which polynomial
Madarasi@19
   220
algorithm is known.
alpar@2
   221
alpar@3
   222
The first practically usable approach was due to
Madarasi@19
   223
\emph{Ullmann}\cite{Ullmann} which is a commonly used depth-first
alpar@3
   224
search based algorithm with a complex heuristic for reducing the
alpar@3
   225
number of visited states. A major problem is its $\Theta(n^3)$ space
alpar@3
   226
complexity, which makes it impractical in the case of big sparse
alpar@3
   227
graphs.
alpar@2
   228
alpar@4
   229
In a recent paper, Ullmann\cite{UllmannBit} presents an
alpar@3
   230
improved version of this algorithm based on a bit-vector solution for
alpar@3
   231
the binary Constraint Satisfaction Problem.
alpar@2
   232
Madarasi@19
   233
The \emph{Nauty} algorithm\cite{Nauty} transforms the two graphs to
alpar@3
   234
a canonical form before starting to check for the isomorphism. It has
alpar@3
   235
been considered as one of the fastest graph isomorphism algorithms,
alpar@3
   236
although graph categories were shown in which it takes exponentially
alpar@3
   237
many steps. This algorithm handles only the graph isomorphism problem.
alpar@2
   238
alpar@4
   239
The \emph{LAD} algorithm\cite{Lad} uses a depth-first search
alpar@3
   240
strategy and formulates the matching as a Constraint Satisfaction
alpar@3
   241
Problem to prune the search tree. The constraints are that the mapping
alpar@3
   242
has to be injective and edge-preserving, hence it is possible to
alpar@3
   243
handle new matching types as well.
alpar@2
   244
Madarasi@19
   245
The \emph{RI} algorithm\cite{RI} and its variations are based on a
alpar@3
   246
state space representation. After reordering the nodes of the graphs,
alpar@3
   247
it uses some fast executable heuristic checks without using any
alpar@3
   248
complex pruning rules. It seems to run really efficiently on graphs
alpar@3
   249
coming from biology, and won the International Contest on Pattern
alpar@3
   250
Search in Biological Databases\cite{Content}.
alpar@2
   251
alpar@3
   252
The currently most commonly used algorithm is the
Madarasi@19
   253
\emph{VF2}\cite{VF2}, the improved version of \emph{VF}\cite{VF}, which was
alpar@3
   254
designed for solving pattern matching and computer vision problems,
alpar@3
   255
and has been one of the best overall algorithms for more than a
alpar@3
   256
decade. Although, it can't be up to new specialized algorithms, it is
alpar@3
   257
still widely used due to its simplicity and space efficiency. VF2 uses
alpar@3
   258
a state space representation and checks some conditions in each state
alpar@3
   259
to prune the search tree.
alpar@2
   260
Madarasi@19
   261
Meanwhile, another variant called \emph{VF2 Plus}\cite{VF2Plus} has
alpar@3
   262
been published. It is considered to be as efficient as the RI
alpar@3
   263
algorithm and has a strictly better behavior on large graphs.  The
alpar@3
   264
main idea of VF2 Plus is to precompute a heuristic node order of the
alpar@3
   265
small graph, in which the VF2 works more efficiently.
alpar@2
   266
Madarasi@19
   267
This paper introduces \emph{VF2++}, a new further improved algorithm
Madarasi@19
   268
for the graph and (induced)subgraph isomorphism problem, which uses
Madarasi@19
   269
efficient cutting rules and determines a node order in which VF2 runs
Madarasi@19
   270
significantly faster on practical inputs.
Madarasi@19
   271
Madarasi@19
   272
This project was initiated and sponsored by QuantumBio
Madarasi@19
   273
Inc.\cite{QUANTUMBIO} and the implementation --- along with a source
Madarasi@19
   274
code --- has been published as a part of LEMON\cite{LEMON} open source
Madarasi@19
   275
graph library.
Madarasi@19
   276
Madarasi@22
   277
Outline: Section~\ref{sec:ProbStat} defines the problems to be solved, Section~\ref{sec:VF2Alg} provides a description of VF2, Section~\ref{sec:VF2ppAlg} introduces VF2++, a new graph matching algorithm, Section~\ref{sec:VF2ppImpl} presents the details of an efficient implementation of VF2++, and Section~\ref{sec:ExpRes} compares VF2++ to a state of the art algorithm.
Madarasi@22
   278
Madarasi@22
   279
\section{Problem Statement}\label{sec:ProbStat}
Madarasi@19
   280
This section provides a formal description of the problems to be
alpar@3
   281
solved.
alpar@2
   282
\subsection{Definitions}
alpar@2
   283
Madarasi@19
   284
Throughout the paper $G_{1}=(V_{1}, E_{1})$ and
Madarasi@19
   285
$G_{2}=(V_{2}, E_{2})$ denote two undirected graphs.
Madarasi@19
   286
Madarasi@19
   287
\begin{definition}
Madarasi@19
   288
$\mathcal{L}: (V_{1}\cup V_{2}) \longrightarrow K$ is a \textbf{node
Madarasi@19
   289
    label function}, where K is an arbitrary set. The elements in K
Madarasi@19
   290
  are the \textbf{node labels}. Two nodes, u and v are said to be
Madarasi@19
   291
  \textbf{equivalent} if $\mathcal{L}(u)=\mathcal{L}(v)$.
Madarasi@19
   292
\end{definition}
Madarasi@19
   293
Madarasi@19
   294
For the sake of simplicity, in this paper the graph, subgraph and induced subgraph isomorphisms are defined in a more general way.
Madarasi@19
   295
alpar@2
   296
\begin{definition}\label{sec:ismorphic}
Madarasi@19
   297
$G_{1}$ and $G_{2}$ are \textbf{isomorphic} (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19
   298
  V_{1} \longrightarrow V_{2}$ bijection, for which the
alpar@3
   299
  following is true:
alpar@2
   300
\begin{center}
Madarasi@20
   301
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19
   302
$\forall u,v\in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in{E_{2}}$
alpar@2
   303
\end{center}
alpar@2
   304
\end{definition}
Madarasi@19
   305
alpar@2
   306
\begin{definition}
Madarasi@19
   307
$G_{1}$ is a \textbf{subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19
   308
  V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3
   309
  following is true:
alpar@2
   310
\begin{center}
Madarasi@20
   311
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19
   312
$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Rightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2
   313
\end{center}
alpar@2
   314
\end{definition}
alpar@2
   315
alpar@2
   316
\begin{definition} 
Madarasi@19
   317
$G_{1}$ is an \textbf{induced subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists
Madarasi@19
   318
  \mathfrak{m}: V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3
   319
  following is true:
alpar@2
   320
\begin{center}
Madarasi@19
   321
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and
Madarasi@19
   322
Madarasi@19
   323
$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow
Madarasi@19
   324
  (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2
   325
\end{center}
alpar@2
   326
\end{definition}
alpar@2
   327
alpar@2
   328
alpar@2
   329
\subsection{Common problems}\label{sec:CommProb}
alpar@2
   330
alpar@3
   331
The focus of this paper is on two extensively studied topics, the
alpar@3
   332
subgraph isomorphism and its variations. However, the following
alpar@3
   333
problems also appear in many applications.
alpar@2
   334
alpar@3
   335
The \textbf{subgraph matching problem} is the following: is
Madarasi@19
   336
$G_{1}$ isomorphic to any subgraph of $G_{2}$ by a given node
alpar@3
   337
label?
alpar@2
   338
alpar@3
   339
The \textbf{induced subgraph matching problem} asks the same about the
alpar@3
   340
existence of an induced subgraph.
alpar@2
   341
alpar@3
   342
The \textbf{graph isomorphism problem} can be defined as induced
alpar@3
   343
subgraph matching problem where the sizes of the two graphs are equal.
alpar@2
   344
Madarasi@22
   345
In addition, one may want to find a \textbf{single} mapping or \textbf{enumerate} all of them.
alpar@2
   346
Madarasi@22
   347
Note that some authors refer to the term
alpar@3
   348
\emph{subgraph isomorphism problem} as an \emph{induced subgraph
alpar@3
   349
  isomorphism problem}.
alpar@2
   350
Madarasi@22
   351
\section{The VF2 Algorithm}\label{sec:VF2Alg}
alpar@3
   352
This algorithm is the basis of both the VF2++ and the VF2 Plus.  VF2
alpar@4
   353
is able to handle all the variations mentioned in Section
alpar@4
   354
  \ref{sec:CommProb}.  Although it can also handle directed graphs,
alpar@3
   355
for the sake of simplicity, only the undirected case will be
alpar@3
   356
discussed.
alpar@2
   357
alpar@2
   358
alpar@2
   359
\subsection{Common notations}
Madarasi@19
   360
\indent Assume $G_{1}$ is searched in $G_{2}$.  The following
alpar@3
   361
definitions and notations will be used throughout the whole paper.
alpar@2
   362
\begin{definition}
Madarasi@19
   363
An injection $\mathfrak{m} : D \longrightarrow V_2$ is called (partial) \textbf{mapping}, where $D\subseteq V_1$.
Madarasi@19
   364
\end{definition}
Madarasi@19
   365
Madarasi@19
   366
\begin{notation}
Madarasi@19
   367
$\mathfrak{D}(f)$ and $\mathfrak{R}(f)$ denote the domain and the range of a function $f$, respectively.
Madarasi@19
   368
\end{notation}
Madarasi@19
   369
Madarasi@19
   370
\begin{definition}
Madarasi@19
   371
Mapping $\mathfrak{m}$ \textbf{covers} a node $u\in V_1\cup V_2$ if $u\in \mathfrak{D}(\mathfrak{m})\cup \mathfrak{R}(\mathfrak{m})$.
alpar@2
   372
\end{definition}
alpar@2
   373
alpar@2
   374
\begin{definition}
Madarasi@19
   375
A mapping $\mathfrak{m}$ is $\mathbf{whole\ mapping}$ if $\mathfrak{m}$ covers all the
Madarasi@19
   376
nodes of $V_{1}$, i.e. $\mathfrak{D}(\mathfrak{m})=V_1$.
alpar@2
   377
\end{definition}
alpar@2
   378
alpar@2
   379
\begin{definition}
Madarasi@20
   380
Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : \mathfrak{m}(w)=f(w)$ and $f(u)=v$ holds. Where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$, otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
alpar@2
   381
\end{definition}
alpar@2
   382
alpar@2
   383
\begin{notation}
Madarasi@19
   384
Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
Madarasi@19
   385
which can be substituted by any of the $\mathbf{ISO}$, $\mathbf{SUB}$
Madarasi@19
   386
and $\mathbf{IND}$ problems.
alpar@2
   387
\end{notation}
alpar@2
   388
alpar@2
   389
\begin{definition}
Madarasi@19
   390
Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cons_{PT}}$ is a
Madarasi@17
   391
\textbf{consistency function by } $\mathbf{PT}$ if the following
Madarasi@19
   392
holds. If there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
alpar@2
   393
\end{definition}
alpar@2
   394
alpar@2
   395
\begin{definition} 
Madarasi@19
   396
Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
Madarasi@17
   397
\textbf{cutting function by } $\mathbf{PT}$ if the following
Madarasi@19
   398
holds. $\mathbf{Cut_{PT}(\mathfrak{m})}$ is false if there exists a sequence of extend operations, which results in a whole mapping satisfying the requirements of $PT$.
alpar@2
   399
\end{definition}
alpar@2
   400
alpar@2
   401
\begin{definition}
Madarasi@19
   402
$\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
Madarasi@19
   403
  $Cons_{PT}(\mathfrak{m})$ is true.
alpar@2
   404
\end{definition}
alpar@2
   405
alpar@2
   406
$Cons_{PT}$ and $Cut_{PT}$ will often be used in the following form.
alpar@2
   407
\begin{notation}
Madarasi@19
   408
Let $\mathbf{Cons_{PT}(p, \mathfrak{m})}:=Cons_{PT}(extend(\mathfrak{m},p))$, and
Madarasi@19
   409
$\mathbf{Cut_{PT}(p, \mathfrak{m})}:=Cut_{PT}(extend(\mathfrak{m},p))$, where
Madarasi@19
   410
$p\in{V_{1}\backslash\mathfrak{D}(\mathfrak{m}) \!\times\!V_{2}\backslash\mathfrak{R}(\mathfrak{m})}$.
alpar@2
   411
\end{notation}
alpar@2
   412
alpar@3
   413
$Cons_{PT}$ will be used to check the consistency of the already
alpar@3
   414
covered nodes, while $Cut_{PT}$ is for looking ahead to recognize if
alpar@3
   415
no whole consistent mapping can contain the current mapping.
alpar@2
   416
alpar@2
   417
\subsection{Overview of the algorithm}
alpar@3
   418
VF2 uses a state space representation of mappings, $Cons_{PT}$ for
alpar@3
   419
excluding inconsistency with the problem type and $Cut_{PT}$ for
Madarasi@19
   420
pruning the search tree.
alpar@2
   421
alpar@4
   422
Algorithm~\ref{alg:VF2Pseu} is a high level description of
Madarasi@19
   423
the VF2 matching algorithm. Each state of the matching process can
Madarasi@19
   424
be associated with a mapping $\mathfrak{m}$. The initial state
Madarasi@19
   425
is associated with a mapping $\mathfrak{m}$, for which
Madarasi@19
   426
$\mathfrak{D}(\mathfrak{m})=\emptyset$, i.e. it starts with an empty mapping.
alpar@2
   427
alpar@2
   428
alpar@2
   429
\begin{algorithm}
Madarasi@13
   430
\algtext*{EndIf}%ne nyomtasson end if-et
Madarasi@13
   431
\algtext*{EndFor}%ne
Madarasi@13
   432
\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2
   433
\caption{\hspace{0.5cm}$A\ high\ level\ description\ of\ VF2$}\label{alg:VF2Pseu}
alpar@2
   434
\begin{algorithmic}[1]
alpar@2
   435
Madarasi@19
   436
\Procedure{VF2}{Mapping $\mathfrak{m}$, ProblemType $PT$}
Madarasi@19
   437
  \If{$\mathfrak{m}$ covers
Madarasi@19
   438
    $V_{1}$} \State Output($\mathfrak{m}$)
Madarasi@19
   439
  \Else
Madarasi@19
   440
  \State Compute the set $P_\mathfrak{m}$ of the pairs candidate for inclusion
Madarasi@19
   441
  in $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
Madarasi@19
   442
    $\neg$Cut$_{PT}$($p,\mathfrak{m}$)}
Madarasi@19
   443
    \State \textbf{call}
Madarasi@19
   444
  VF2($extend(\mathfrak{m},p)$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
alpar@2
   445
\end{algorithmic}
alpar@2
   446
\end{algorithm}
alpar@2
   447
alpar@2
   448
Madarasi@19
   449
For the current mapping $\mathfrak{m}$, the algorithm computes $P_\mathfrak{m}$, the set of
Madarasi@19
   450
candidate node pairs for adding to the current mapping $\mathfrak{m}_s$.
alpar@2
   451
Madarasi@19
   452
For each pair $p$ in $P_\mathfrak{m}$, $Cons_{PT}(p,\mathfrak{m})$ and
Madarasi@19
   453
$Cut_{PT}(p,\mathfrak{m})$ are evaluated. If the former is true and
Madarasi@19
   454
the latter is false, the whole process is recursively applied to
Madarasi@19
   455
$extend(\mathfrak{m},p)$. Otherwise, $extend(\mathfrak{m},p)$ is not consistent by $PT$, or it
Madarasi@19
   456
can be proved that $\mathfrak{m}$ can not be extended to a whole mapping.
alpar@2
   457
Madarasi@11
   458
In order to make sure of the correctness, see
alpar@2
   459
\begin{claim}
alpar@3
   460
Through consistent mappings, only consistent whole mappings can be
Madarasi@19
   461
reached, and all the consistent whole mappings are reachable through
alpar@3
   462
consistent mappings.
alpar@2
   463
\end{claim}
alpar@2
   464
Madarasi@19
   465
Note that a mapping may be reached in exponentially many different ways, since the
Madarasi@19
   466
order of extensions does not influence the nascent mapping.
alpar@2
   467
alpar@2
   468
However, one may observe
alpar@2
   469
alpar@2
   470
\begin{claim}
alpar@2
   471
\label{claim:claimTotOrd}
Madarasi@19
   472
Let $\prec$ be an arbitrary total ordering relation on $V_{1}$.  If
Madarasi@19
   473
the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
alpar@2
   474
\begin{center}
Madarasi@19
   475
$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
alpar@2
   476
\end{center}
Madarasi@19
   477
then no mapping can be reached more than once, and each whole mapping remains reachable.
alpar@2
   478
\end{claim}
alpar@2
   479
alpar@3
   480
Note that the cornerstone of the improvements to VF2 is a proper
alpar@3
   481
choice of a total ordering.
alpar@2
   482
Madarasi@19
   483
\subsection{The candidate set}
alpar@2
   484
\label{candidateComputingVF2}
Madarasi@19
   485
Let $P_\mathfrak{m}$ be the set of the candidate pairs for inclusion in $\mathfrak{m}$.
alpar@2
   486
alpar@2
   487
\begin{notation}
Madarasi@19
   488
Let $\mathbf{T_{1}(\mathfrak{m})}:=\{u \in V_{1}\backslash\mathfrak{D}(\mathfrak{m}) : \exists \tilde{u}\in{\mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}}\}$, and
Madarasi@19
   489
 $\mathbf{T_{2}(\mathfrak{m})} := \{v \in V_{2}\backslash\mathfrak{R}(\mathfrak{m}) : \exists\tilde{v}\in{\mathfrak{R}(\mathfrak{m}):(v,\tilde{v})\in E_{2}}\}$.
alpar@2
   490
\end{notation}
alpar@2
   491
Madarasi@19
   492
The set $P_\mathfrak{m}$ includes the pairs of uncovered neighbours of covered
Madarasi@17
   493
nodes, and if there is not such a node pair, all the pairs containing
alpar@3
   494
two uncovered nodes are added. Formally, let
alpar@2
   495
\[
Madarasi@19
   496
 P_\mathfrak{m}\!=\!
alpar@2
   497
  \begin{cases} 
Madarasi@19
   498
   T_{1}(\mathfrak{m})\times T_{2}(\mathfrak{m})&\hspace{-0.15cm}\text{if }
Madarasi@19
   499
   T_{1}(\mathfrak{m})\!\neq\!\emptyset\ \text{and }T_{2}(\mathfrak{m})\!\neq
Madarasi@19
   500
   \emptyset,\\ (V_{1}\!\setminus\!\mathfrak{D}(\mathfrak{m}))\!\times\!(V_{2}\!\setminus\!\mathfrak{R}(\mathfrak{m}))
Madarasi@19
   501
   &\hspace{-0.15cm}\text{otherwise}.
alpar@2
   502
  \end{cases}
alpar@2
   503
\]
alpar@2
   504
alpar@2
   505
\subsection{Consistency}
Madarasi@19
   506
Suppose $p=(u,v)$, where $u\in V_{1}$ and $v\in V_{2}$, $\mathfrak{m}$ is a consistent mapping by
Madarasi@19
   507
$PT$. $Cons_{PT}(p,\mathfrak{m})$ checks whether
Madarasi@19
   508
including pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
Madarasi@15
   509
Madarasi@15
   510
For example, the consistency function of induced subgraph isomorphism is as follows.
alpar@2
   511
\begin{notation}
Madarasi@19
   512
Let $\mathbf{\Gamma_{1} (u)}:=\{\tilde{u}\in V_{1} :
Madarasi@19
   513
(u,\tilde{u})\in E_{1}\}$, and $\mathbf{\Gamma_{2}
Madarasi@19
   514
  (v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$.
alpar@2
   515
\end{notation}
alpar@2
   516
Madarasi@19
   517
$extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ $\Leftrightarrow
Madarasi@19
   518
(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
Madarasi@19
   519
\Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$. The
alpar@3
   520
following formulation gives an efficient way of calculating
alpar@3
   521
$Cons_{IND}$.
alpar@2
   522
\begin{claim}
Madarasi@19
   523
$Cons_{IND}((u,v),\mathfrak{m}):=\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
Madarasi@19
   524
  (\forall \tilde{u}\in \Gamma_{1}(u)
Madarasi@19
   525
  \cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is a
alpar@3
   526
  consistency function in the case of $IND$.
alpar@2
   527
\end{claim}
alpar@2
   528
alpar@2
   529
\subsection{Cutting rules}
Madarasi@19
   530
$Cut_{PT}(p,\mathfrak{m})$ is defined by a collection of efficiently
Madarasi@19
   531
verifiable conditions. The requirement is that $Cut_{PT}(p,\mathfrak{m})$ can
Madarasi@19
   532
be true only if it is impossible to extend $extend(\mathfrak{m},p)$ to a
alpar@3
   533
whole mapping.
Madarasi@15
   534
Madarasi@15
   535
As an example, the cutting function of induced subgraph isomorphism is presented.
alpar@2
   536
\begin{notation}
Madarasi@19
   537
Let $\mathbf{\tilde{T}_{1}}(\mathfrak{m}):=(V_{1}\backslash
Madarasi@19
   538
\mathfrak{D}(\mathfrak{m}))\backslash T_{1}(\mathfrak{m})$, and
Madarasi@19
   539
\\ $\mathbf{\tilde{T}_{2}}(\mathfrak{m}):=(V_{2}\backslash
Madarasi@19
   540
\mathfrak{R}(\mathfrak{m}))\backslash T_{2}(\mathfrak{m})$.
alpar@2
   541
\end{notation}
Madarasi@15
   542
alpar@2
   543
\begin{claim}
Madarasi@19
   544
$Cut_{IND}((u,v),\mathfrak{m}):= |\Gamma_{2} (v)\ \cap\ T_{2}(\mathfrak{m})| <
Madarasi@19
   545
  |\Gamma_{1} (u)\ \cap\ T_{1}(\mathfrak{m})| \vee |\Gamma_{2}(v)\cap
Madarasi@19
   546
  \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}(u)\cap
Madarasi@19
   547
  \tilde{T}_{1}(\mathfrak{m})|$ is a cutting function by $IND$.
alpar@2
   548
\end{claim}
alpar@2
   549
Madarasi@22
   550
\section{The VF2++ Algorithm}\label{sec:VF2ppAlg}
alpar@3
   551
Although any total ordering relation makes the search space of VF2 a
alpar@3
   552
tree, its choice turns out to dramatically influence the number of
alpar@3
   553
visited states. The goal is to determine an efficient one as quickly
alpar@3
   554
as possible.
alpar@2
   555
alpar@3
   556
The main reason for VF2++' superiority over VF2 is twofold. Firstly,
alpar@3
   557
taking into account the structure and the node labeling of the graph,
alpar@3
   558
VF2++ determines a state order in which most of the unfruitful
alpar@3
   559
branches of the search space can be pruned immediately. Secondly,
alpar@3
   560
introducing more efficient --- nevertheless still easier to compute
alpar@3
   561
--- cutting rules reduces the chance of going astray even further.
alpar@2
   562
alpar@3
   563
In addition to the usual subgraph isomorphism, specialized versions
alpar@3
   564
for induced subgraph isomorphism and for graph isomorphism have been
Madarasi@22
   565
designed.
alpar@2
   566
Madarasi@22
   567
Note that a weaker version of the cutting rules and an efficient
Madarasi@22
   568
candidate set calculating were described in \cite{VF2Plus}.
alpar@2
   569
alpar@3
   570
It should be noted that all the methods described in this section are
Madarasi@22
   571
extendable to handle directed graphs and edge labels as well.
alpar@3
   572
The basic ideas and the detailed description of VF2++ are provided in
Madarasi@22
   573
the following.\newline
alpar@2
   574
Madarasi@19
   575
The goal is to find a matching order in which the algorithm is able to
Madarasi@19
   576
recognize inconsistency or prune the infeasible branches on the
Madarasi@19
   577
highest levels and goes deep only if it is needed.
Madarasi@19
   578
Madarasi@19
   579
\begin{notation}
Madarasi@19
   580
Let $\mathbf{Conn_{H}(u)}:=|\Gamma_{1}(u)\cap H\}|$, that is the
Madarasi@19
   581
number of neighbours of u which are in H, where $u\in V_{1} $ and
Madarasi@19
   582
$H\subseteq V_{1}$.
Madarasi@19
   583
\end{notation}
Madarasi@19
   584
Madarasi@19
   585
The principal question is the following. Suppose a mapping $\mathfrak{m}$ is
Madarasi@19
   586
given. For which node of $T_{1}(\mathfrak{m})$ is the hardest to find a
Madarasi@19
   587
consistent pair in $G_{2}$? The more covered neighbours a node in
Madarasi@19
   588
$T_{1}(\mathfrak{m})$ has --- i.e. the largest $Conn_{\mathfrak{D}(\mathfrak{m})}$ it has
Madarasi@19
   589
---, the more rarely satisfiable consistency constraints for its pair
Madarasi@19
   590
are given.
Madarasi@19
   591
Madarasi@19
   592
In biology, most of the graphs are sparse, thus several nodes in
Madarasi@19
   593
$T_{1}(\mathfrak{m})$ may have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$, which makes
Madarasi@19
   594
reasonable to define a secondary and a tertiary order between them.
Madarasi@19
   595
The observation above proves itself to be as determining, that the
Madarasi@19
   596
secondary ordering prefers nodes with the most uncovered neighbours
Madarasi@19
   597
among which have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$ to increase
Madarasi@19
   598
$Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes so much, as possible.  The
Madarasi@19
   599
tertiary ordering prefers nodes having the rarest uncovered labels.
Madarasi@19
   600
Madarasi@19
   601
Note that the secondary ordering is the same as the ordering by $deg$,
Madarasi@19
   602
which is a static data in front of the above used.
Madarasi@19
   603
Madarasi@19
   604
These rules can easily result in a matching order which contains the
Madarasi@19
   605
nodes of a long path successively, whose nodes may have low $Conn$ and
Madarasi@19
   606
is easily matchable into $G_{2}$. To avoid that, a BFS order is
Madarasi@19
   607
used, which provides the shortest possible paths.
Madarasi@19
   608
\newline
Madarasi@19
   609
Madarasi@19
   610
In the following, some examples on which the VF2 may be slow are
Madarasi@19
   611
described, although they are easily solvable by using a proper
Madarasi@19
   612
matching order.
Madarasi@19
   613
Madarasi@19
   614
\begin{example}
Madarasi@19
   615
Suppose $G_{1}$ can be mapped into $G_{2}$ in many ways
Madarasi@19
   616
without node labels. Let $u\in V_{1}$ and $v\in V_{2}$.
Madarasi@19
   617
\newline
Madarasi@19
   618
$\mathcal{L}(u):=black$
Madarasi@19
   619
\newline
Madarasi@19
   620
$\mathcal{L}(v):=black$
Madarasi@19
   621
\newline
Madarasi@22
   622
$\mathcal{L}(\tilde{u}):=red \ \forall \tilde{u}\in V_{1}\backslash
Madarasi@22
   623
\{u\}$
Madarasi@19
   624
\newline
Madarasi@22
   625
$\mathcal{L}(\tilde{v}):=red \ \forall \tilde{v}\in V_{2}\backslash
Madarasi@22
   626
\{v\}$
Madarasi@19
   627
\newline
Madarasi@19
   628
Madarasi@19
   629
Now, any mapping by $\mathcal{L}$ must contain $(u,v)$, since
Madarasi@19
   630
$u$ is black and no node in $V_{2}$ has a black label except
Madarasi@19
   631
$v$. If unfortunately $u$ were the last node which will get covered,
Madarasi@19
   632
VF2 would check only in the last steps, whether $u$ can be matched to
Madarasi@19
   633
$v$.
Madarasi@19
   634
\newline
Madarasi@19
   635
However, had $u$ been the first matched node, u would have been
Madarasi@19
   636
matched immediately to v, so all the mappings would have been
Madarasi@19
   637
precluded in which node labels can not correspond.
Madarasi@19
   638
\end{example}
Madarasi@19
   639
Madarasi@19
   640
\begin{example}
Madarasi@19
   641
Suppose there is no node label given, $G_{1}$ is a small graph and
Madarasi@19
   642
can not be mapped into $G_{2}$ and $u\in V_{1}$.
Madarasi@19
   643
\newline
Madarasi@19
   644
Let $G'_{1}:=(V_{1}\cup
Madarasi@19
   645
\{u'_{1},u'_{2},..,u'_{k}\},E_{1}\cup
Madarasi@19
   646
\{(u,u'_{1}),(u'_{1},u'_{2}),..,(u'_{k-1},u'_{k})\})$, that is,
Madarasi@19
   647
$G'_{1}$ is $G_{1}\cup \{ a\ k$ long path, which is disjoint
Madarasi@19
   648
from $G_{1}$ and one of its starting points is connected to $u\in
Madarasi@19
   649
V_{1}\}$.
Madarasi@19
   650
\newline
Madarasi@19
   651
Is there a subgraph of $G_{2}$, which is isomorph with
Madarasi@19
   652
$G'_{1}$?
Madarasi@19
   653
\newline
Madarasi@19
   654
If unfortunately the nodes of the path were the first $k$ nodes in the
Madarasi@19
   655
matching order, the algorithm would iterate through all the possible k
Madarasi@19
   656
long paths in $G_{2}$, and it would recognize that no path can be
Madarasi@19
   657
extended to $G'_{1}$.
Madarasi@19
   658
\newline
Madarasi@19
   659
However, had it started by the matching of $G_{1}$, it would not
Madarasi@19
   660
have matched any nodes of the path.
Madarasi@19
   661
\end{example}
Madarasi@19
   662
Madarasi@19
   663
These examples may look artificial, but the same problems also appear
Madarasi@19
   664
in real-world instances, even though in a less obvious way.
Madarasi@19
   665
alpar@2
   666
\subsection{Preparations}
alpar@2
   667
\begin{claim}
alpar@2
   668
\label{claim:claimCoverFromLeft}
alpar@3
   669
The total ordering relation uniquely determines a node order, in which
Madarasi@19
   670
the nodes of $V_{1}$ will be covered by VF2. From the point of
alpar@3
   671
view of the matching procedure, this means, that always the same node
Madarasi@19
   672
of $G_{1}$ will be covered on the d-th level.
alpar@2
   673
\end{claim}
alpar@2
   674
alpar@2
   675
\begin{definition}
Madarasi@19
   676
An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
Madarasi@19
   677
$V_{1}$ is \textbf{matching order} if exists $\prec$ total
alpar@3
   678
ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
Madarasi@19
   679
pair for $u_{\sigma(d)}$ for all $d\in\{1,..,|V_{1}|\}$.
alpar@2
   680
\end{definition}
alpar@2
   681
alpar@2
   682
\begin{claim}\label{claim:MOclaim}
Madarasi@17
   683
A total ordering is matching order iff the nodes of every component
alpar@3
   684
form an interval in the node sequence, and every node connects to a
Madarasi@17
   685
previous node in its component except the first node of each component.
alpar@2
   686
\end{claim}
alpar@2
   687
alpar@3
   688
To summing up, a total ordering always uniquely determines a matching
alpar@3
   689
order, and every matching order can be determined by a total ordering,
alpar@3
   690
however, more than one different total orderings may determine the
alpar@3
   691
same matching order.
alpar@2
   692
alpar@2
   693
\subsection{Total ordering}
Madarasi@19
   694
The matching order will be searched directly.
alpar@2
   695
\begin{notation}
Madarasi@19
   696
Let \textbf{F$_\mathcal{M}$(l)}$:=|\{v\in V_{2} :
Madarasi@19
   697
l=\mathcal{L}(v)\}|-|\{u\in V_{1}\backslash \mathcal{M} : l=\mathcal{L}(u)\}|$ ,
Madarasi@19
   698
where $l$ is a label and $\mathcal{M}\subseteq V_{1}$.
alpar@2
   699
\end{notation}
alpar@2
   700
Madarasi@17
   701
\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{-f}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
alpar@2
   702
\end{definition}
alpar@2
   703
alpar@2
   704
\begin{algorithm}
Madarasi@8
   705
\algtext*{EndIf}
Madarasi@8
   706
\algtext*{EndProcedure}
alpar@2
   707
\algtext*{EndWhile}
Madarasi@13
   708
\algtext*{EndFor}
alpar@2
   709
\caption{\hspace{0.5cm}$The\ method\ of\ VF2++\ for\ determining\ the\ node\ order$}\label{alg:VF2PPPseu}
alpar@2
   710
\begin{algorithmic}[1]
alpar@3
   711
\Procedure{VF2++order}{} \State $\mathcal{M}$ := $\emptyset$
Madarasi@19
   712
\Comment{matching order} \While{$V_{1}\backslash \mathcal{M}
alpar@3
   713
  \neq\emptyset$} \State $r\in$ arg max$_{deg}$ (arg
Madarasi@19
   714
min$_{F_\mathcal{M}\circ \mathcal{L}}(V_{1}\backslash
alpar@3
   715
\mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
alpar@3
   716
root node $r$.  \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
alpar@3
   717
$d$-th level \State Process $V_d$ \Comment{See Algorithm
Madarasi@8
   718
  \ref{alg:VF2PPProcess1}} \EndFor
alpar@3
   719
\EndWhile \EndProcedure
alpar@2
   720
\end{algorithmic}
alpar@2
   721
\end{algorithm}
alpar@2
   722
alpar@2
   723
\begin{algorithm}
Madarasi@8
   724
\algtext*{EndIf}
Madarasi@8
   725
\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2
   726
\algtext*{EndWhile}
Madarasi@8
   727
\caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
alpar@2
   728
\begin{algorithmic}[1]
Madarasi@17
   729
\Procedure{VF2++ProcessLevel}{$V_{d}$} \While{$V_d\neq\emptyset$}
Madarasi@22
   730
\State $m\in$ arg min$_{F_{\mathcal{M}\circ\ \mathcal{L}}}($ arg max$_{deg}($arg
alpar@3
   731
max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
alpar@3
   732
\State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
alpar@3
   733
$F_\mathcal{M}$ \EndWhile \EndProcedure
alpar@2
   734
\end{algorithmic}
alpar@2
   735
\end{algorithm}
alpar@2
   736
alpar@4
   737
Algorithm~\ref{alg:VF2PPPseu} is a high level description of the
alpar@4
   738
matching order procedure of VF2++. It computes a BFS tree for each
Madarasi@19
   739
component in ascending order of their rarest node labels and largest $deg$,
alpar@4
   740
whose root vertex is the component's minimal
Madarasi@8
   741
node. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
Madarasi@8
   742
lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
Madarasi@8
   743
to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
alpar@2
   744
alpar@4
   745
Claim~\ref{claim:MOclaim} shows that Algorithm~\ref{alg:VF2PPPseu}
alpar@4
   746
provides a matching order.
alpar@2
   747
alpar@2
   748
alpar@2
   749
\subsection{Cutting rules}
alpar@2
   750
\label{VF2PPCuttingRules}
Madarasi@19
   751
This section presents the cutting rules of VF2++, which are improved by using extra information coming from the node labels.
alpar@2
   752
\begin{notation}
Madarasi@19
   753
Let $\mathbf{\Gamma_{1}^{l}(u)}:=\{\tilde{u} : \mathcal{L}(\tilde{u})=l
Madarasi@19
   754
\wedge \tilde{u}\in \Gamma_{1} (u)\}$ and
Madarasi@19
   755
$\mathbf{\Gamma_{2}^{l}(v)}:=\{\tilde{v} : \mathcal{L}(\tilde{v})=l \wedge
Madarasi@19
   756
\tilde{v}\in \Gamma_{2} (v)\}$, where $u\in V_{1}$, $v\in
Madarasi@19
   757
V_{2}$ and $l$ is a label.
alpar@2
   758
\end{notation}
alpar@2
   759
Madarasi@19
   760
\subsubsection{Induced subgraph isomorphism}
alpar@2
   761
\begin{claim}
Madarasi@19
   762
\[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function by IND.
Madarasi@19
   763
\end{claim}
Madarasi@19
   764
\subsubsection{Graph isomorphism}
Madarasi@19
   765
\begin{claim}
Madarasi@19
   766
\[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!\neq\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\  \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| \neq |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function by ISO.
alpar@2
   767
\end{claim}
Madarasi@13
   768
Madarasi@19
   769
\subsubsection{Subgraph isomorphism}
Madarasi@19
   770
\begin{claim}
Madarasi@19
   771
\[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\] is a cutting function by SUB.
Madarasi@19
   772
\end{claim}
alpar@2
   773
Madarasi@19
   774
Madarasi@19
   775
Madarasi@22
   776
\section{Implementation details}\label{sec:VF2ppImpl}
alpar@3
   777
This section provides a detailed summary of an efficient
alpar@3
   778
implementation of VF2++.
Madarasi@22
   779
\subsection{Storing a mapping}
alpar@3
   780
After fixing an arbitrary node order ($u_0, u_1, ..,
Madarasi@19
   781
u_{|G_{1}|-1}$) of $G_{1}$, an array $M$ is usable to store
alpar@3
   782
the current mapping in the following way.
alpar@2
   783
\[
alpar@3
   784
 M[i] =
alpar@2
   785
  \begin{cases} 
Madarasi@19
   786
   v & if\ (u_i,v)\ is\ in\ the\ mapping\\ INV\!ALI\!D &
Madarasi@17
   787
   if\ no\ node\ has\ been\ mapped\ to\ u_i,
alpar@2
   788
  \end{cases}
alpar@2
   789
\]
Madarasi@19
   790
where $i\in\{0,1, ..,|G_{1}|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
alpar@3
   791
means "no node".
Madarasi@22
   792
\subsection{Avoiding the recurrence}
alpar@4
   793
The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
Madarasi@9
   794
as a \textit{while loop}, which has a loop counter $depth$ denoting the
Madarasi@9
   795
all-time depth of the recursion. Fixing a matching order, let $M$
Madarasi@9
   796
denote the array storing the all-time mapping. Based on Claim~\ref{claim:claimCoverFromLeft},
Madarasi@19
   797
$M$ is $INV\!ALI\!D$ from index $depth$+1 and not $INV\!ALI\!D$ before
Madarasi@9
   798
$depth$. $M[depth]$ changes
alpar@3
   799
while the state is being processed, but the property is held before
alpar@3
   800
both stepping back to a predecessor state and exploring a successor
alpar@3
   801
state.
alpar@2
   802
alpar@3
   803
The necessary part of the candidate set is easily maintainable or
alpar@3
   804
computable by following
alpar@4
   805
Section~\ref{candidateComputingVF2}. A much faster method
alpar@3
   806
has been designed for biological- and sparse graphs, see the next
alpar@3
   807
section for details.
alpar@2
   808
Madarasi@22
   809
\subsection{Calculating the candidates for a node}
alpar@4
   810
Being aware of Claim~\ref{claim:claimCoverFromLeft}, the
alpar@3
   811
task is not to maintain the candidate set, but to generate the
Madarasi@19
   812
candidate nodes in $G_{2}$ for a given node $u\in V_{1}$.  In
Madarasi@20
   813
case of any of the three problem types and a mapping $\mathfrak{m}$, if a node $v\in
Madarasi@19
   814
V_{2}$ is a potential pair of $u\in V_{1}$, then $\forall
Madarasi@20
   815
u'\in \mathfrak{D}(\mathfrak{m}) : (u,u')\in
Madarasi@20
   816
E_{1}\Rightarrow (v,\mathfrak{m}(u'))\in
Madarasi@19
   817
E_{2}$. That is, each covered neighbour of $u$ has to be mapped to
alpar@3
   818
a covered neighbour of $v$.
alpar@2
   819
alpar@3
   820
Having said that, an algorithm running in $\Theta(deg)$ time is
alpar@3
   821
describable if there exists a covered node in the component containing
Madarasi@17
   822
$u$, and a linear one otherwise.
alpar@2
   823
alpar@2
   824
Madarasi@22
   825
\subsection{Determining the node order}
alpar@3
   826
This section describes how the node order preprocessing method of
alpar@3
   827
VF2++ can efficiently be implemented.
alpar@2
   828
alpar@3
   829
For using lookup tables, the node labels are associated with the
alpar@3
   830
numbers $\{0,1,..,|K|-1\}$, where $K$ is the set of the labels. It
Madarasi@9
   831
enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
alpar@3
   832
$\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
Madarasi@19
   833
in $V_{1}$ having label $i$, which is easy to compute in
Madarasi@19
   834
$\Theta(|V_{1}|)$ steps.
alpar@2
   835
Madarasi@19
   836
Representing $\mathcal{M}\subseteq V_{1}$ as an array of
Madarasi@19
   837
size $|V_{1}|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done inplace by swapping nodes.
alpar@2
   838
Madarasi@22
   839
\subsection{Cutting rules}
alpar@4
   840
In Section~\ref{VF2PPCuttingRules}, the cutting rules were
Madarasi@19
   841
described using the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
Madarasi@19
   842
and $\tilde T_{2}$, which are dependent on the all-time mapping
alpar@3
   843
(i.e. on the all-time state). The aim is to check the labeled cutting
alpar@3
   844
rules of VF2++ in $\Theta(deg)$ time.
alpar@2
   845
alpar@3
   846
Firstly, suppose that these four sets are given in such a way, that
alpar@3
   847
checking whether a node is in a certain set takes constant time,
alpar@3
   848
e.g. they are given by their 0-1 characteristic vectors. Let $L$ be an
alpar@3
   849
initially zero integer lookup table of size $|K|$. After incrementing
Madarasi@19
   850
$L[\mathcal{L}(u')]$ for all $u'\in \Gamma_{1}(u) \cap T_{1}(\mathfrak{m})$ and
Madarasi@19
   851
decrementing $L[\mathcal{L}(v')]$ for all $v'\in\Gamma_{2} (v) \cap
Madarasi@19
   852
T_{2}(s)$, the first part of the cutting rules is checkable in
alpar@3
   853
$\Theta(deg)$ time by considering the proper signs of $L$. Setting $L$
alpar@3
   854
to zero takes $\Theta(deg)$ time again, which makes it possible to use
Madarasi@9
   855
the same table through the whole algorithm. The second part of the
alpar@3
   856
cutting rules can be verified using the same method with $\tilde
Madarasi@19
   857
T_{1}$ and $\tilde T_{2}$ instead of $T_{1}$ and
Madarasi@19
   858
$T_{2}$. Thus, the overall complexity is $\Theta(deg)$.
alpar@2
   859
Madarasi@19
   860
Another integer lookup table storing the number of covered neighbours
Madarasi@19
   861
of each node in $G_{2}$ gives all the information about the sets
Madarasi@19
   862
$T_{2}$ and $\tilde T_{2}$, which is maintainable in
alpar@3
   863
$\Theta(deg)$ time when a pair is added or substracted by incrementing
alpar@3
   864
or decrementing the proper indices. A further improvement is that the
Madarasi@19
   865
values of $L[\mathcal{L}(u')]$ in case of checking $u$ are dependent only on
Madarasi@19
   866
$u$, i.e. on the size of the mapping, so for each $u\in V_{1}$ an
alpar@3
   867
array of pairs (label, number of such labels) can be stored to skip
alpar@3
   868
the maintaining operations. Note that these arrays are at most of size
Madarasi@19
   869
$deg$.
alpar@2
   870
Madarasi@19
   871
Using similar techniques, the consistency function can be evaluated in
alpar@3
   872
$\Theta(deg)$ steps, as well.
alpar@2
   873
Madarasi@22
   874
\section{Experimental results}\label{sec:ExpRes}
Madarasi@19
   875
This section compares the performance of VF2++ and VF2 Plus. According to
Madarasi@19
   876
our experience, both algorithms run faster than VF2 with orders of
Madarasi@19
   877
magnitude, thus its inclusion was not reasonable.
alpar@2
   878
Madarasi@19
   879
The algorithms were implemented in C++ using the open source
Madarasi@19
   880
LEMON graph and network optimization library\cite{LEMON}. The test were carried out on a linux based system with an Intel i7 X980 3.33 GHz CPU and 6 GB of RAM.
alpar@2
   881
\subsection{Biological graphs}
alpar@3
   882
The tests have been executed on a recent biological dataset created
alpar@3
   883
for the International Contest on Pattern Search in Biological
Madarasi@7
   884
Databases\cite{Content}, which has been constructed of molecule,
Madarasi@7
   885
protein and contact map graphs extracted from the Protein Data
alpar@3
   886
Bank\cite{ProteinDataBank}.
alpar@2
   887
alpar@3
   888
The molecule dataset contains small graphs with less than 100 nodes
alpar@3
   889
and an average degree of less than 3. The protein dataset contains
alpar@3
   890
graphs having 500-10 000 nodes and an average degree of 4, while the
alpar@3
   891
contact map dataset contains graphs with 150-800 nodes and an average
alpar@3
   892
degree of 20.  \\
alpar@2
   893
Madarasi@19
   894
In the following, both the induced subgraph isomorphism and the graph
alpar@3
   895
isomorphism will be examined.
alpar@2
   896
Madarasi@17
   897
This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For runtime results, please see Figure~\ref{fig:bioIND}.
Madarasi@7
   898
Madarasi@7
   899
In an other experiment, the nodes of each graph in the database had been
Madarasi@7
   900
shuffled, and an isomorphism between the shuffled and the original
Madarasi@7
   901
graph was searched. The solution times are shown on Figure~\ref{fig:bioISO}.
Madarasi@7
   902
Madarasi@7
   903
Madarasi@17
   904
\begin{figure}[H]
Madarasi@17
   905
\vspace*{-2cm}
Madarasi@17
   906
\hspace*{-1.5cm}
Madarasi@17
   907
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17
   908
\begin{figure}[H]
Madarasi@17
   909
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@17
   910
\begin{axis}[title=Molecules IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17
   911
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17
   912
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@17
   913
  format/1000 sep = \thinspace}]
Madarasi@17
   914
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17
   915
\addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
Madarasi@17
   916
  size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
Madarasi@17
   917
\end{axis}
Madarasi@17
   918
\end{tikzpicture}
Madarasi@17
   919
\caption{In the case of molecules, the algorithms have
Madarasi@17
   920
  similar behaviour, but VF2++ is almost two times faster even on such
Madarasi@17
   921
  small graphs.} \label{fig:INDMolecule}
Madarasi@17
   922
\end{figure}
Madarasi@17
   923
\end{subfigure}
Madarasi@17
   924
\hspace*{1.5cm}
Madarasi@17
   925
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17
   926
\begin{figure}[H]
Madarasi@17
   927
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@17
   928
\begin{axis}[title=Contact maps IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17
   929
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17
   930
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@17
   931
  format/1000 sep = \thinspace}]
Madarasi@17
   932
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17
   933
\addplot table {Orig/ContactMaps.128.txt};
Madarasi@17
   934
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
Madarasi@17
   935
        {VF2PPLabel/ContactMaps.128.txt};
Madarasi@17
   936
\end{axis}
Madarasi@17
   937
\end{tikzpicture}
Madarasi@17
   938
\caption{On contact maps, VF2++ runs almost in constant time, while VF2
Madarasi@17
   939
  Plus has a near linear behaviour.} \label{fig:INDContact}
Madarasi@17
   940
\end{figure}
Madarasi@17
   941
\end{subfigure}
Madarasi@17
   942
Madarasi@17
   943
\begin{center}
Madarasi@17
   944
\vspace*{-0.5cm}
Madarasi@17
   945
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17
   946
\begin{figure}[H]
Madarasi@17
   947
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@17
   948
  \begin{axis}[title=Proteins IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17
   949
  =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17
   950
    west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@17
   951
    format/1000 sep = \thinspace}] %\addplot+[only marks] table
Madarasi@17
   952
    {proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
Madarasi@17
   953
    table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
Madarasi@17
   954
      size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
Madarasi@17
   955
  \end{axis}
Madarasi@17
   956
  \end{tikzpicture}
Madarasi@17
   957
\caption{Both the algorithms have linear behaviour on protein
Madarasi@17
   958
  graphs. VF2++ is more than 10 times faster than VF2
Madarasi@17
   959
  Plus.} \label{fig:INDProt}
Madarasi@17
   960
\end{figure}
Madarasi@17
   961
\end{subfigure}
Madarasi@17
   962
\end{center}
Madarasi@17
   963
\vspace*{-0.5cm}
Madarasi@17
   964
\caption{\normalsize{Induced subgraph isomorphism on biological graphs}}\label{fig:bioIND}
Madarasi@17
   965
\end{figure}
Madarasi@17
   966
alpar@2
   967
alpar@2
   968
\begin{figure}[H]
Madarasi@7
   969
\vspace*{-2cm}
Madarasi@7
   970
\hspace*{-1.5cm}
Madarasi@7
   971
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
   972
\begin{figure}[H]
Madarasi@7
   973
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@7
   974
\begin{axis}[title=Molecules ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7
   975
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7
   976
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
   977
  format/1000 sep = \thinspace}]
Madarasi@7
   978
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7
   979
\addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7
   980
  size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
Madarasi@7
   981
\end{axis}
Madarasi@7
   982
\end{tikzpicture}
Madarasi@7
   983
\caption{In the case of molecules, there is not such a significant
Madarasi@7
   984
  difference, but VF2++ seems to be faster as the number of nodes
Madarasi@7
   985
  increases.}\label{fig:ISOMolecule}
Madarasi@7
   986
\end{figure}
Madarasi@7
   987
\end{subfigure}
Madarasi@7
   988
\hspace*{1.5cm}
Madarasi@7
   989
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
   990
\begin{figure}[H]
Madarasi@7
   991
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@7
   992
\begin{axis}[title=Contact maps ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7
   993
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7
   994
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
   995
  format/1000 sep = \thinspace}]
Madarasi@7
   996
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7
   997
\addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7
   998
  size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
Madarasi@7
   999
\end{axis}
Madarasi@7
  1000
\end{tikzpicture}
Madarasi@7
  1001
\caption{The results are closer to each other on contact maps, but
Madarasi@7
  1002
  VF2++ still performs consistently better.}\label{fig:ISOContact}
Madarasi@7
  1003
\end{figure}
Madarasi@7
  1004
\end{subfigure}
Madarasi@7
  1005
alpar@2
  1006
\begin{center}
Madarasi@7
  1007
\vspace*{-0.5cm}
Madarasi@7
  1008
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
  1009
\begin{figure}[H]
Madarasi@7
  1010
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@7
  1011
\begin{axis}[title=Proteins ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7
  1012
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7
  1013
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
  1014
  format/1000 sep = \thinspace}]
Madarasi@7
  1015
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7
  1016
\addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7
  1017
  size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
Madarasi@7
  1018
\end{axis}
Madarasi@7
  1019
\end{tikzpicture}
Madarasi@7
  1020
\caption{On protein graphs, VF2 Plus has a super linear time
Madarasi@7
  1021
  complexity, while VF2++ runs in near constant time. The difference
Madarasi@7
  1022
  is about two order of magnitude on large graphs.}\label{fig:ISOProt}
Madarasi@7
  1023
\end{figure}
Madarasi@7
  1024
\end{subfigure}
Madarasi@7
  1025
\end{center}
Madarasi@7
  1026
\vspace*{-0.6cm}
Madarasi@17
  1027
\caption{\normalsize{Graph isomorphism on biological graphs}}\label{fig:bioISO}
Madarasi@7
  1028
\end{figure}
Madarasi@7
  1029
Madarasi@7
  1030
alpar@2
  1031
alpar@2
  1032
alpar@2
  1033
\subsection{Random graphs}
alpar@3
  1034
This section compares VF2++ with VF2 Plus on random graphs of a large
alpar@3
  1035
size. The node labels are uniformly distributed.  Let $\delta$ denote
alpar@3
  1036
the average degree.  For the parameters of problems solved in the
alpar@3
  1037
experiments, please see the top of each chart.
alpar@2
  1038
\subsubsection{Graph isomorphism}
alpar@3
  1039
To evaluate the efficiency of the algorithms in the case of graph
Madarasi@17
  1040
isomorphism, random connected graphs of less than 20 000 nodes have been
alpar@3
  1041
considered. Generating a random graph and shuffling its nodes, an
Madarasi@7
  1042
isomorphism had to be found. Figure \ref{fig:randISO} shows the runtime results
alpar@4
  1043
on graph sets of various density.
alpar@2
  1044
Madarasi@7
  1045
Madarasi@7
  1046
Madarasi@7
  1047
Madarasi@12
  1048
\begin{figure}
Madarasi@7
  1049
\vspace*{-1.5cm}
Madarasi@7
  1050
\hspace*{-1.5cm}
Madarasi@7
  1051
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1052
\begin{center}
alpar@2
  1053
\begin{tikzpicture}
Madarasi@7
  1054
\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1055
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1056
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
  1057
  format/1000 sep = \space}]
alpar@2
  1058
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1059
\addplot table {randGraph/iso/vf2pIso5_1.txt};
alpar@3
  1060
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1061
        {randGraph/iso/vf2ppIso5_1.txt};
alpar@2
  1062
\end{axis}
alpar@2
  1063
\end{tikzpicture}
alpar@2
  1064
\end{center}
Madarasi@7
  1065
\end{subfigure}
Madarasi@7
  1066
%\hspace{1cm}
Madarasi@7
  1067
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1068
\begin{center}
alpar@2
  1069
\begin{tikzpicture}
Madarasi@7
  1070
\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1071
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1072
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
  1073
  format/1000 sep = \space}]
alpar@2
  1074
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1075
\addplot table {randGraph/iso/vf2pIso10_1.txt};
alpar@3
  1076
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1077
        {randGraph/iso/vf2ppIso10_1.txt};
alpar@2
  1078
\end{axis}
alpar@2
  1079
\end{tikzpicture}
alpar@2
  1080
\end{center}
Madarasi@7
  1081
\end{subfigure}
Madarasi@7
  1082
%%\hspace{1cm}
Madarasi@7
  1083
\hspace*{-1.5cm}
Madarasi@7
  1084
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1085
\begin{center}
alpar@2
  1086
\begin{tikzpicture}
Madarasi@7
  1087
\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1088
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1089
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
  1090
  format/1000 sep = \space}]
alpar@2
  1091
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1092
\addplot table {randGraph/iso/vf2pIso15_1.txt};
alpar@3
  1093
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1094
        {randGraph/iso/vf2ppIso15_1.txt};
alpar@2
  1095
\end{axis}
alpar@2
  1096
\end{tikzpicture}
alpar@2
  1097
\end{center}
Madarasi@7
  1098
     \end{subfigure}
Madarasi@7
  1099
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1100
\begin{center}
alpar@2
  1101
\begin{tikzpicture}
Madarasi@7
  1102
\begin{axis}[title={Random ISO, $\delta = 35$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1103
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1104
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
  1105
  format/1000 sep = \space}]
alpar@2
  1106
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1107
\addplot table {randGraph/iso/vf2pIso35_1.txt};
alpar@3
  1108
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1109
        {randGraph/iso/vf2ppIso35_1.txt};
alpar@2
  1110
\end{axis}
alpar@2
  1111
\end{tikzpicture}
alpar@2
  1112
\end{center}
Madarasi@7
  1113
\end{subfigure}
Madarasi@7
  1114
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
  1115
\hspace*{-1.5cm}
alpar@2
  1116
\begin{tikzpicture}
Madarasi@7
  1117
\begin{axis}[title={Random ISO, $\delta = 45$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1118
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1119
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7
  1120
  format/1000 sep = \space}]
alpar@2
  1121
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1122
\addplot table {randGraph/iso/vf2pIso45_1.txt};
alpar@3
  1123
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1124
        {randGraph/iso/vf2ppIso45_1.txt};
alpar@2
  1125
\end{axis}
alpar@2
  1126
\end{tikzpicture}
Madarasi@7
  1127
\end{subfigure}
Madarasi@7
  1128
\hspace*{-1.5cm}
Madarasi@7
  1129
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1130
\begin{tikzpicture}
Madarasi@7
  1131
\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1132
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1133
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1134
  format/1000 sep = \thinspace}]
alpar@2
  1135
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1136
\addplot table {randGraph/iso/vf2pIso100_1.txt};
alpar@3
  1137
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1138
        {randGraph/iso/vf2ppIso100_1.txt};
alpar@2
  1139
\end{axis}
alpar@2
  1140
\end{tikzpicture}
Madarasi@7
  1141
\end{subfigure}
alpar@2
  1142
\vspace*{-0.8cm}
Madarasi@7
  1143
\caption{IND on graphs having an average degree of
Madarasi@7
  1144
  5.}\label{fig:randISO}
alpar@2
  1145
\end{figure}
alpar@2
  1146
alpar@2
  1147
alpar@2
  1148
\subsubsection{Induced subgraph isomorphism}
Madarasi@17
  1149
This section presents a comparison of VF2++ and VF2 Plus in the case
alpar@3
  1150
of induced subgraph isomorphism. In addition to the size of the large
alpar@3
  1151
graph, that of the small graph dramatically influences the hardness of
alpar@3
  1152
a given problem too, so the overall picture is provided by examining
alpar@3
  1153
small graphs of various size.
alpar@2
  1154
Madarasi@17
  1155
For each chart, a number $0<\rho< 1$ has been fixed, and the following
Madarasi@19
  1156
has been executed 150 times. Generating a large graph $G_{2}$ of an average degree of $\delta$,
Madarasi@19
  1157
choose 10 of its induced subgraphs having $\rho\ |V_{2}|$ nodes,
alpar@3
  1158
and for all the 10 subgraphs find a mapping by using both the graph
alpar@3
  1159
matching algorithms.  The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
Madarasi@10
  1160
0.3, 0.6, 0.8, 0.95$ cases have been examined, see
alpar@4
  1161
Figure~\ref{fig:randIND5}, \ref{fig:randIND10} and
Madarasi@10
  1162
\ref{fig:randIND35}.
alpar@2
  1163
alpar@2
  1164
alpar@2
  1165
alpar@2
  1166
alpar@2
  1167
Madarasi@12
  1168
\begin{figure}
Madarasi@7
  1169
\vspace*{-1.5cm}
Madarasi@7
  1170
\hspace*{-1.5cm}
alpar@2
  1171
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1172
\begin{center}
alpar@2
  1173
\begin{tikzpicture}
alpar@2
  1174
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1175
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1176
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1177
  format/1000 sep = \space}]
alpar@2
  1178
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1179
\addplot table {randGraph/ind/vf2pInd5_0.05.txt};
alpar@3
  1180
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1181
        {randGraph/ind/vf2ppInd5_0.05.txt};
alpar@2
  1182
\end{axis}
alpar@2
  1183
\end{tikzpicture}
alpar@2
  1184
\end{center}
alpar@2
  1185
     \end{subfigure}
alpar@2
  1186
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1187
\begin{center}
alpar@2
  1188
\begin{tikzpicture}
alpar@2
  1189
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1190
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1191
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1192
  format/1000 sep = \space}]
alpar@2
  1193
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1194
\addplot table {randGraph/ind/vf2pInd5_0.1.txt};
alpar@3
  1195
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1196
        {randGraph/ind/vf2ppInd5_0.1.txt};
alpar@2
  1197
\end{axis}
alpar@2
  1198
\end{tikzpicture}
alpar@2
  1199
\end{center}
alpar@2
  1200
\end{subfigure}
Madarasi@7
  1201
\hspace*{-1.5cm}
alpar@2
  1202
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1203
\begin{center}
alpar@2
  1204
\begin{tikzpicture}
alpar@2
  1205
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1206
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1207
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1208
  format/1000 sep = \space}]
alpar@2
  1209
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1210
\addplot table {randGraph/ind/vf2pInd5_0.3.txt};
alpar@3
  1211
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1212
        {randGraph/ind/vf2ppInd5_0.3.txt};
alpar@2
  1213
\end{axis}
alpar@2
  1214
\end{tikzpicture}
alpar@2
  1215
\end{center}
alpar@2
  1216
     \end{subfigure}
alpar@2
  1217
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1218
\begin{center}
alpar@2
  1219
\begin{tikzpicture}
alpar@2
  1220
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1221
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1222
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1223
  format/1000 sep = \space}]
alpar@2
  1224
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1225
\addplot table {randGraph/ind/vf2pInd5_0.6.txt};
alpar@3
  1226
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1227
        {randGraph/ind/vf2ppInd5_0.6.txt};
alpar@2
  1228
\end{axis}
alpar@2
  1229
\end{tikzpicture}
alpar@2
  1230
\end{center}
alpar@2
  1231
\end{subfigure}
alpar@2
  1232
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
  1233
\hspace*{-1.5cm}
alpar@2
  1234
\begin{tikzpicture}
alpar@2
  1235
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1236
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1237
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1238
  format/1000 sep = \space}]
alpar@2
  1239
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1240
\addplot table {randGraph/ind/vf2pInd5_0.8.txt};
alpar@3
  1241
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1242
        {randGraph/ind/vf2ppInd5_0.8.txt};
alpar@2
  1243
\end{axis}
alpar@2
  1244
\end{tikzpicture}
alpar@2
  1245
     \end{subfigure}
Madarasi@7
  1246
     \hspace*{-1.5cm}
alpar@2
  1247
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1248
\begin{tikzpicture}
alpar@2
  1249
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1250
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1251
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1252
  format/1000 sep = \thinspace}]
alpar@2
  1253
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1254
\addplot table {randGraph/ind/vf2pInd5_0.95.txt};
alpar@3
  1255
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1256
        {randGraph/ind/vf2ppInd5_0.95.txt};
alpar@2
  1257
\end{axis}
alpar@2
  1258
\end{tikzpicture}
alpar@2
  1259
\end{subfigure}
alpar@2
  1260
\vspace*{-0.8cm}
alpar@3
  1261
\caption{IND on graphs having an average degree of
alpar@3
  1262
  5.}\label{fig:randIND5}
alpar@2
  1263
\end{figure}
alpar@2
  1264
alpar@2
  1265
alpar@2
  1266
\begin{figure}[H]
Madarasi@7
  1267
\vspace*{-1.5cm}
Madarasi@7
  1268
\hspace*{-1.5cm}
alpar@2
  1269
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1270
\begin{center}
Madarasi@7
  1271
\hspace*{-0.5cm}
alpar@2
  1272
\begin{tikzpicture}
alpar@2
  1273
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1274
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1275
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1276
  format/1000 sep = \space}]
alpar@2
  1277
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1278
\addplot table {randGraph/ind/vf2pInd10_0.05.txt};
alpar@3
  1279
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1280
        {randGraph/ind/vf2ppInd10_0.05.txt};
alpar@2
  1281
\end{axis}
alpar@2
  1282
\end{tikzpicture}
alpar@2
  1283
\end{center}
alpar@2
  1284
     \end{subfigure}
alpar@2
  1285
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1286
\begin{center}
Madarasi@7
  1287
     \hspace*{-0.5cm}
alpar@2
  1288
\begin{tikzpicture}
alpar@2
  1289
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1290
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1291
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1292
  format/1000 sep = \space}]
alpar@2
  1293
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1294
\addplot table {randGraph/ind/vf2pInd10_0.1.txt};
alpar@3
  1295
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1296
        {randGraph/ind/vf2ppInd10_0.1.txt};
alpar@2
  1297
\end{axis}
alpar@2
  1298
\end{tikzpicture}
alpar@2
  1299
\end{center}
alpar@2
  1300
\end{subfigure}
Madarasi@7
  1301
\hspace*{-1.5cm}
alpar@2
  1302
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1303
\begin{center}
alpar@2
  1304
\begin{tikzpicture}
alpar@2
  1305
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1306
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1307
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1308
  format/1000 sep = \space}]
alpar@2
  1309
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1310
\addplot table {randGraph/ind/vf2pInd10_0.3.txt};
alpar@3
  1311
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1312
        {randGraph/ind/vf2ppInd10_0.3.txt};
alpar@2
  1313
\end{axis}
alpar@2
  1314
\end{tikzpicture}
alpar@2
  1315
\end{center}
alpar@2
  1316
     \end{subfigure}
alpar@2
  1317
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1318
\begin{center}
alpar@2
  1319
\begin{tikzpicture}
alpar@2
  1320
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1321
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1322
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1323
  format/1000 sep = \space}]
alpar@2
  1324
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1325
\addplot table {randGraph/ind/vf2pInd10_0.6.txt};
alpar@3
  1326
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1327
        {randGraph/ind/vf2ppInd10_0.6.txt};
alpar@2
  1328
\end{axis}
alpar@2
  1329
\end{tikzpicture}
alpar@2
  1330
\end{center}
alpar@2
  1331
\end{subfigure}
Madarasi@7
  1332
\hspace*{-1.5cm}
alpar@2
  1333
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1334
\begin{tikzpicture}
alpar@2
  1335
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1336
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1337
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1338
  format/1000 sep = \space}]
alpar@2
  1339
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1340
\addplot table {randGraph/ind/vf2pInd10_0.8.txt};
alpar@3
  1341
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1342
        {randGraph/ind/vf2ppInd10_0.8.txt};
alpar@2
  1343
\end{axis}
alpar@2
  1344
\end{tikzpicture}
alpar@2
  1345
     \end{subfigure}
alpar@2
  1346
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1347
\begin{tikzpicture}
alpar@2
  1348
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1349
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1350
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1351
  format/1000 sep = \thinspace}]
alpar@2
  1352
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1353
\addplot table {randGraph/ind/vf2pInd10_0.95.txt};
alpar@3
  1354
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1355
        {randGraph/ind/vf2ppInd10_0.95.txt};
alpar@2
  1356
\end{axis}
alpar@2
  1357
\end{tikzpicture}
alpar@2
  1358
\end{subfigure}
alpar@2
  1359
\vspace*{-0.8cm}
alpar@3
  1360
\caption{IND on graphs having an average degree of
alpar@3
  1361
  10.}\label{fig:randIND10}
alpar@2
  1362
\end{figure}
alpar@2
  1363
alpar@2
  1364
alpar@2
  1365
alpar@2
  1366
\begin{figure}[H]
Madarasi@7
  1367
\vspace*{-1.5cm}
Madarasi@7
  1368
\hspace*{-1.5cm}
alpar@2
  1369
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1370
\begin{center}
alpar@2
  1371
\begin{tikzpicture}
alpar@2
  1372
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1373
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1374
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1375
  format/1000 sep = \space}]
alpar@2
  1376
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1377
\addplot table {randGraph/ind/vf2pInd35_0.05.txt};
alpar@3
  1378
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1379
        {randGraph/ind/vf2ppInd35_0.05.txt};
alpar@2
  1380
\end{axis}
alpar@2
  1381
\end{tikzpicture}
alpar@2
  1382
\end{center}
alpar@2
  1383
     \end{subfigure}
alpar@2
  1384
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1385
\begin{center}
alpar@2
  1386
\begin{tikzpicture}
alpar@2
  1387
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1388
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1389
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1390
  format/1000 sep = \space}]
alpar@2
  1391
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1392
\addplot table {randGraph/ind/vf2pInd35_0.1.txt};
alpar@3
  1393
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1394
        {randGraph/ind/vf2ppInd35_0.1.txt};
alpar@2
  1395
\end{axis}
alpar@2
  1396
\end{tikzpicture}
alpar@2
  1397
\end{center}
alpar@2
  1398
\end{subfigure}
Madarasi@7
  1399
\hspace*{-1.5cm}
alpar@2
  1400
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1401
\begin{center}
alpar@2
  1402
\begin{tikzpicture}
alpar@2
  1403
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1404
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1405
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1406
  format/1000 sep = \space}]
alpar@2
  1407
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1408
\addplot table {randGraph/ind/vf2pInd35_0.3.txt};
alpar@3
  1409
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1410
        {randGraph/ind/vf2ppInd35_0.3.txt};
alpar@2
  1411
\end{axis}
alpar@2
  1412
\end{tikzpicture}
alpar@2
  1413
\end{center}
alpar@2
  1414
     \end{subfigure}
alpar@2
  1415
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1416
\begin{center}
alpar@2
  1417
\begin{tikzpicture}
alpar@2
  1418
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1419
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1420
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1421
  format/1000 sep = \space}]
alpar@2
  1422
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1423
\addplot table {randGraph/ind/vf2pInd35_0.6.txt};
alpar@3
  1424
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1425
        {randGraph/ind/vf2ppInd35_0.6.txt};
alpar@2
  1426
\end{axis}
alpar@2
  1427
\end{tikzpicture}
alpar@2
  1428
\end{center}
alpar@2
  1429
\end{subfigure}
Madarasi@7
  1430
\hspace*{-1.5cm}
alpar@2
  1431
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1432
\begin{tikzpicture}
alpar@2
  1433
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1434
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1435
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1436
  format/1000 sep = \space}]
alpar@2
  1437
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1438
\addplot table {randGraph/ind/vf2pInd35_0.8.txt};
alpar@3
  1439
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1440
        {randGraph/ind/vf2ppInd35_0.8.txt};
alpar@2
  1441
\end{axis}
alpar@2
  1442
\end{tikzpicture}
alpar@2
  1443
     \end{subfigure}
alpar@2
  1444
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1445
\begin{tikzpicture}
alpar@2
  1446
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1447
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1448
  west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3
  1449
  format/1000 sep = \thinspace}]
alpar@2
  1450
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1451
\addplot table {randGraph/ind/vf2pInd35_0.95.txt};
alpar@3
  1452
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1453
        {randGraph/ind/vf2ppInd35_0.95.txt};
alpar@2
  1454
\end{axis}
alpar@2
  1455
\end{tikzpicture}
alpar@2
  1456
\end{subfigure}
alpar@2
  1457
\vspace*{-0.8cm}
alpar@3
  1458
\caption{IND on graphs having an average degree of
alpar@3
  1459
  35.}\label{fig:randIND35}
alpar@2
  1460
\end{figure}
alpar@2
  1461
alpar@2
  1462
alpar@3
  1463
Based on these experiments, VF2++ is faster than VF2 Plus and able to
alpar@3
  1464
handle really large graphs in milliseconds. Note that when $IND$ was
alpar@3
  1465
considered and the small graphs had proportionally few nodes ($\rho =
alpar@3
  1466
0.05$, or $\rho = 0.1$), then VF2 Plus produced some inefficient node
alpar@4
  1467
orders (e.g. see the $\delta=10$ case on
Madarasi@17
  1468
Figure~\ref{fig:randIND10}). If these instances had been excluded, the
alpar@3
  1469
charts would have seemed to be similar to the other ones.
alpar@3
  1470
Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
alpar@3
  1471
Plus slow slightly down, but remain practically usable even on graphs
alpar@3
  1472
having 10 000 nodes.
alpar@2
  1473
alpar@2
  1474
alpar@2
  1475
alpar@2
  1476
alpar@3
  1477
alpar@2
  1478
\section{Conclusion}
Madarasi@19
  1479
This paper presented VF2++, a new graph matching algorithm based on VF2, called VF2++, and analyzed it from a practical viewpoint.
alpar@2
  1480
alpar@3
  1481
Recognizing the importance of the node order and determining an
alpar@3
  1482
efficient one, VF2++ is able to match graphs of thousands of nodes in
alpar@3
  1483
near practically linear time including preprocessing. In addition to
alpar@3
  1484
the proper order, VF2++ uses more efficient consistency and cutting
alpar@3
  1485
rules which are easy to compute and make the algorithm able to prune
alpar@3
  1486
most of the unfruitful branches without going astray.
alpar@2
  1487
alpar@3
  1488
In order to show the efficiency of the new method, it has been
Madarasi@19
  1489
compared to VF2 Plus\cite{VF2Plus}, which is the best contemporary algorithm.
Madarasi@19
  1490
.
alpar@2
  1491
alpar@3
  1492
The experiments show that VF2++ consistently outperforms VF2 Plus on
alpar@3
  1493
biological graphs. It seems to be asymptotically faster on protein and
alpar@3
  1494
on contact map graphs in the case of induced subgraph isomorphism,
alpar@3
  1495
while in the case of graph isomorphism, it has definitely better
alpar@3
  1496
asymptotic behaviour on protein graphs.
alpar@2
  1497
alpar@3
  1498
Regarding random sparse graphs, not only has VF2++ proved itself to be
Madarasi@19
  1499
faster than VF2 Plus, but it also has a practically linear behaviour both
Madarasi@19
  1500
in the case of induced subgraph- and graph isomorphism.
alpar@2
  1501
alpar@2
  1502
alpar@0
  1503
alpar@0
  1504
%% The Appendices part is started with the command \appendix;
alpar@0
  1505
%% appendix sections are then done as normal sections
alpar@0
  1506
%% \appendix
alpar@0
  1507
alpar@0
  1508
%% \section{}
alpar@0
  1509
%% \label{}
alpar@0
  1510
alpar@0
  1511
%% If you have bibdatabase file and want bibtex to generate the
alpar@0
  1512
%% bibitems, please use
alpar@0
  1513
%%
alpar@3
  1514
\bibliographystyle{elsarticle-num} \bibliography{bibliography}
alpar@0
  1515
alpar@0
  1516
%% else use the following coding to input the bibitems directly in the
alpar@0
  1517
%% TeX file.
alpar@0
  1518
alpar@2
  1519
%% \begin{thebibliography}{00}
alpar@0
  1520
alpar@2
  1521
%% %% \bibitem{label}
alpar@2
  1522
%% %% Text of bibliographic item
alpar@0
  1523
alpar@2
  1524
%% \bibitem{}
alpar@0
  1525
alpar@2
  1526
%% \end{thebibliography}
alpar@2
  1527
alpar@0
  1528
\end{document}
alpar@0
  1529
\endinput
alpar@0
  1530
%%
alpar@0
  1531
%% End of file `elsarticle-template-num.tex'.