damecco.tex
author Alpar Juttner <alpar@cs.elte.hu>
Fri, 22 Sep 2017 12:52:55 +0200
changeset 29 0ff72a828b16
parent 28 523fddfd7a01
permissions -rw-r--r--
Spellchecking and some rewording
alpar@0
     1
%% 
alpar@0
     2
%% Copyright 2007, 2008, 2009 Elsevier Ltd
alpar@0
     3
%% 
alpar@0
     4
%% This file is part of the 'Elsarticle Bundle'.
alpar@0
     5
%% ---------------------------------------------
alpar@0
     6
%% 
alpar@0
     7
%% It may be distributed under the conditions of the LaTeX Project Public
alpar@0
     8
%% License, either version 1.2 of this license or (at your option) any
alpar@0
     9
%% later version.  The latest version of this license is in
alpar@0
    10
%%    http://www.latex-project.org/lppl.txt
alpar@0
    11
%% and version 1.2 or later is part of all distributions of LaTeX
alpar@0
    12
%% version 1999/12/01 or later.
alpar@0
    13
%% 
alpar@0
    14
%% The list of all files belonging to the 'Elsarticle Bundle' is
alpar@0
    15
%% given in the file `manifest.txt'.
alpar@0
    16
%% 
alpar@0
    17
alpar@0
    18
%% Template article for Elsevier's document class `elsarticle'
alpar@0
    19
%% with numbered style bibliographic references
alpar@0
    20
%% SP 2008/03/01
alpar@0
    21
alpar@0
    22
\documentclass[preprint,12pt]{elsarticle}
alpar@0
    23
alpar@0
    24
%% Use the option review to obtain double line spacing
alpar@0
    25
%% \documentclass[authoryear,preprint,review,12pt]{elsarticle}
alpar@0
    26
alpar@0
    27
%% Use the options 1p,twocolumn; 3p; 3p,twocolumn; 5p; or 5p,twocolumn
alpar@0
    28
%% for a journal layout:
alpar@0
    29
%% \documentclass[final,1p,times]{elsarticle}
alpar@0
    30
%% \documentclass[final,1p,times,twocolumn]{elsarticle}
alpar@0
    31
%% \documentclass[final,3p,times]{elsarticle}
alpar@0
    32
%% \documentclass[final,3p,times,twocolumn]{elsarticle}
alpar@0
    33
%% \documentclass[final,5p,times]{elsarticle}
alpar@0
    34
%% \documentclass[final,5p,times,twocolumn]{elsarticle}
alpar@0
    35
alpar@0
    36
%% For including figures, graphicx.sty has been loaded in
alpar@0
    37
%% elsarticle.cls. If you prefer to use the old commands
alpar@0
    38
%% please give \usepackage{epsfig}
alpar@0
    39
alpar@0
    40
%% The amssymb package provides various useful mathematical symbols
alpar@0
    41
\usepackage{amssymb}
alpar@0
    42
%% The amsthm package provides extended theorem environments
alpar@0
    43
%% \usepackage{amsthm}
alpar@0
    44
alpar@0
    45
%% The lineno packages adds line numbers. Start line numbering with
alpar@0
    46
%% \begin{linenumbers}, end it with \end{linenumbers}. Or switch it on
alpar@0
    47
%% for the whole article with \linenumbers.
alpar@0
    48
%% \usepackage{lineno}
alpar@0
    49
alpar@2
    50
\usepackage{amsmath}
alpar@2
    51
%% \usepackage[pdftex]{graphicx}
alpar@2
    52
alpar@2
    53
\usepackage{pgfplots}
alpar@2
    54
\pgfplotsset{width=9cm}
alpar@2
    55
\pgfplotsset{compat=1.8}
alpar@2
    56
alpar@2
    57
\usepackage{caption}
alpar@2
    58
\usepackage{subcaption} 
alpar@2
    59
alpar@2
    60
\usepackage{algorithm}
alpar@2
    61
\usepackage{algpseudocode}
alpar@2
    62
\usepackage{tikz}
alpar@2
    63
alpar@2
    64
\usepackage{amsthm,amssymb}
alpar@2
    65
\renewcommand{\qedsymbol}{\rule{0.7em}{0.7em}}
alpar@2
    66
alpar@2
    67
\newtheorem{theorem}{Theorem}[subsection]
alpar@2
    68
\newtheorem{corollary}{Corollary}[theorem]
alpar@2
    69
\newtheorem{claim}[theorem]{Claim}
alpar@2
    70
alpar@2
    71
\newtheorem{definition}{Definition}[subsection]
alpar@2
    72
\newtheorem{notation}{Notation}[subsection]
alpar@2
    73
\newtheorem{example}{Example}[subsection]
alpar@2
    74
\usetikzlibrary{decorations.markings}
alpar@2
    75
\let\oldproofname=\proofname
alpar@2
    76
%% \renewcommand{\proofname}{\rm\bf{Proof:}}
alpar@2
    77
Madarasi@7
    78
\captionsetup{font=normalsize}
Madarasi@7
    79
alpar@1
    80
\journal{Discrete Applied Mathematics}
alpar@0
    81
alpar@0
    82
\begin{document}
alpar@0
    83
alpar@0
    84
\begin{frontmatter}
alpar@0
    85
alpar@0
    86
%% Title, authors and addresses
alpar@0
    87
alpar@0
    88
%% use the tnoteref command within \title for footnotes;
alpar@0
    89
%% use the tnotetext command for theassociated footnote;
alpar@0
    90
%% use the fnref command within \author or \address for footnotes;
alpar@0
    91
%% use the fntext command for theassociated footnote;
alpar@0
    92
%% use the corref command within \author for corresponding author footnotes;
alpar@0
    93
%% use the cortext command for theassociated footnote;
alpar@0
    94
%% use the ead command for the email address,
alpar@0
    95
%% and the form \ead[url] for the home page:
alpar@0
    96
%% \title{Title\tnoteref{label1}}
alpar@0
    97
%% \tnotetext[label1]{}
alpar@0
    98
%% \author{Name\corref{cor1}\fnref{label2}}
alpar@0
    99
%% \ead{email address}
alpar@0
   100
%% \ead[url]{home page}
alpar@0
   101
%% \fntext[label2]{}
alpar@0
   102
%% \cortext[cor1]{}
alpar@0
   103
%% \address{Address\fnref{label3}}
alpar@0
   104
%% \fntext[label3]{}
alpar@0
   105
alpar@24
   106
\title{VF2++ --- An Improved Subgraph Isomorphism Algorithm}
alpar@0
   107
alpar@0
   108
%% use optional labels to link authors explicitly to addresses:
alpar@0
   109
%% \author[label1,label2]{}
alpar@0
   110
%% \address[label1]{}
alpar@0
   111
%% \address[label2]{}
alpar@0
   112
alpar@24
   113
\author[egres,elte]{Alp{\'a}r J{\"u}ttner}
alpar@24
   114
\ead{alpar@cs.elte.hu}
alpar@24
   115
\author[elte]{P{\'e}ter Madarasi}
alpar@24
   116
\ead{madarasip@caesar.elte.hu}
alpar@24
   117
\address[egres]{MTA-ELTE Egerv{\'a}ry Research Group, Budapest, Hungary.}
alpar@24
   118
\address[elte]{Department of Operations Research, E{\"o}tv{\"o}s Lor{\'a}nd University, Budapest, Hungary.}
alpar@0
   119
alpar@0
   120
\begin{abstract}
alpar@0
   121
alpar@24
   122
  This paper presents a largely improved version of the VF2 algorithm
alpar@24
   123
  for the \emph{Subgraph Isomorphism Problem}. The improvements are
alpar@24
   124
  twofold. Firstly, it is based on a new approach for determining the
alpar@29
   125
  matching order of the nodes, and secondly, more efficient ---
alpar@29
   126
  nevertheless easier to compute --- cutting rules are proposed. They
alpar@29
   127
  together reduce the search space significantly.
alpar@1
   128
Madarasi@28
   129
  In addition to the usual \emph{Subgraph Isomorphism Problem}, the paper also
Madarasi@28
   130
  presents specialized algorithms for the \emph{Induced Subgraph
alpar@24
   131
    Isomorphism} and for the \emph{Graph Isomorphism Problems}.
alpar@1
   132
alpar@24
   133
  Finally, an extensive experimental evaluation is provided using a
Madarasi@28
   134
  wide range of inputs, including both real-life biological and
alpar@24
   135
  chemical datasets and standard randomly generated graph series. The
alpar@24
   136
  results show major and consistent running time improvements over the
alpar@24
   137
  other known methods.
alpar@24
   138
 
Madarasi@28
   139
  The C++ implementations of the algorithms are available open-source as 
Madarasi@28
   140
  part of the LEMON graph and network optimization library.
alpar@24
   141
  
alpar@0
   142
\end{abstract}
alpar@0
   143
alpar@0
   144
\begin{keyword}
alpar@24
   145
  Computational Biology, Subgraph Isomorphism Problem
alpar@0
   146
%% keywords here, in the form: keyword \sep keyword
alpar@0
   147
alpar@0
   148
%% PACS codes here, in the form: \PACS code \sep code
alpar@0
   149
alpar@0
   150
%% MSC codes here, in the form: \MSC code \sep code
alpar@0
   151
%% or \MSC[2008] code \sep code (2000 is the default)
alpar@0
   152
alpar@0
   153
\end{keyword}
alpar@0
   154
alpar@0
   155
\end{frontmatter}
alpar@0
   156
alpar@0
   157
%% \linenumbers
alpar@0
   158
alpar@0
   159
%% main text
alpar@2
   160
\section{Introduction}
alpar@2
   161
\label{sec:intro}
alpar@2
   162
alpar@3
   163
In the last decades, combinatorial structures, and especially graphs
alpar@3
   164
have been considered with ever increasing interest, and applied to the
alpar@29
   165
solution of several new and revised questions. The expressiveness,
alpar@29
   166
the simplicity and the deep theoretical background
alpar@29
   167
of graphs make it one of the most useful
alpar@29
   168
modeling tool and appears constantly in several seemingly independent
Madarasi@19
   169
fields, such as bioinformatics and chemistry.
alpar@2
   170
alpar@3
   171
Complex biological systems arise from the interaction and cooperation
alpar@29
   172
of plenty of molecular components. Getting acquainted with the
alpar@29
   173
structure of such systems at the molecular level is of primary
alpar@29
   174
importance, since protein-protein interaction, DNA-protein
alpar@29
   175
interaction, metabolic interaction, transcription factor binding,
alpar@29
   176
neuronal networks, and hormone singling networks can be understood
alpar@29
   177
this way.
alpar@2
   178
alpar@29
   179
Many chemical and biological structures can easily be modeled
Madarasi@19
   180
as graphs, for instance, a molecular structure can be
Madarasi@19
   181
considered as a graph, whose nodes correspond to atoms and whose
Madarasi@19
   182
edges to chemical bonds. The similarity and dissimilarity of
Madarasi@19
   183
objects corresponding to nodes are incorporated to the model
Madarasi@19
   184
by \emph{node labels}. Understanding such networks basically
Madarasi@26
   185
requires finding specific subgraphs, thus it calls for efficient
Madarasi@19
   186
graph matching algorithms.
alpar@2
   187
Madarasi@19
   188
Other real-world fields related to some
Madarasi@19
   189
variants of graph matching include pattern recognition
Madarasi@28
   190
and machine vision~\cite{HorstBunkeApplications}, symbol recognition~\cite{CordellaVentoSymbolRecognition}, and face identification~\cite{JianzhuangYongFaceIdentification}.  \\
alpar@2
   191
alpar@3
   192
Subgraph and induced subgraph matching problems are known to be
Madarasi@28
   193
NP-Complete~\cite{SubgraphNPC}, while the graph isomorphism problem is
alpar@3
   194
one of the few problems in NP neither known to be in P nor
Madarasi@28
   195
NP-Complete. Although polynomial-time isomorphism algorithms are known
alpar@3
   196
for various graph classes, like trees and planar
Madarasi@28
   197
graphs~\cite{PlanarGraphIso}, bounded valence
Madarasi@28
   198
graphs~\cite{BondedDegGraphIso}, interval graphs~\cite{IntervalGraphIso}
alpar@29
   199
or permutation graphs~\cite{PermGraphIso}. Furthermore, an FPT algorithm has also been presented for the colored hypergraph isomorphism problem in~\cite{ColoredHiperGraphIso}.
alpar@2
   200
alpar@29
   201
In the following, some algorithms which do not need any restrictions on the graphs are
alpar@29
   202
summarized. Even though,
alpar@29
   203
an overall polynomial behavior is not expectable from such an
alpar@29
   204
alternative, they may often have good practical performance, in fact,
alpar@29
   205
they might be the best choice in practice even on a graph class for which polynomial
Madarasi@19
   206
algorithm is known.
alpar@2
   207
alpar@3
   208
The first practically usable approach was due to
Madarasi@28
   209
\emph{Ullmann}~\cite{Ullmann}, which is a commonly used algorithm based on depth-first
Madarasi@28
   210
search with a complex heuristic for reducing the
alpar@3
   211
number of visited states. A major problem is its $\Theta(n^3)$ space
alpar@3
   212
complexity, which makes it impractical in the case of big sparse
alpar@3
   213
graphs.
alpar@2
   214
Madarasi@28
   215
In a recent paper, Ullmann~\cite{UllmannBit} presents an
alpar@3
   216
improved version of this algorithm based on a bit-vector solution for
alpar@3
   217
the binary Constraint Satisfaction Problem.
alpar@2
   218
Madarasi@28
   219
The \emph{Nauty} algorithm~\cite{Nauty} transforms the two graphs to
alpar@29
   220
a canonical form before starting to look for an isomorphism. It has
alpar@3
   221
been considered as one of the fastest graph isomorphism algorithms,
alpar@3
   222
although graph categories were shown in which it takes exponentially
alpar@3
   223
many steps. This algorithm handles only the graph isomorphism problem.
alpar@2
   224
Madarasi@28
   225
The \emph{LAD} algorithm~\cite{Lad} uses a depth-first search
alpar@3
   226
strategy and formulates the matching as a Constraint Satisfaction
alpar@3
   227
Problem to prune the search tree. The constraints are that the mapping
alpar@29
   228
has to be invective and edge-preserving, hence it is possible to
alpar@3
   229
handle new matching types as well.
alpar@2
   230
Madarasi@28
   231
The \emph{RI} algorithm~\cite{RI} and its variations are based on a
alpar@3
   232
state space representation. After reordering the nodes of the graphs,
alpar@3
   233
it uses some fast executable heuristic checks without using any
alpar@3
   234
complex pruning rules. It seems to run really efficiently on graphs
alpar@3
   235
coming from biology, and won the International Contest on Pattern
Madarasi@28
   236
Search in Biological Databases~\cite{Content}.
alpar@2
   237
Madarasi@28
   238
Currently, the most commonly used algorithm is the
alpar@29
   239
\emph{VF2}~\cite{VF2}, an improved version of \emph{VF}~\cite{VF}, which was
alpar@3
   240
designed for solving pattern matching and computer vision problems,
alpar@3
   241
and has been one of the best overall algorithms for more than a
Madarasi@28
   242
decade. Although, it is not as fast as some of the new specialized algorithms, it is still widely used due to its simplicity and space efficiency. VF2 uses
alpar@29
   243
a state space representation and checks specific conditions in each state
alpar@3
   244
to prune the search tree.
alpar@2
   245
Madarasi@28
   246
Meanwhile, another variant called \emph{VF2~Plus}~\cite{VF2Plus} has
alpar@3
   247
been published. It is considered to be as efficient as the RI
alpar@29
   248
algorithm and has a strictly better behavior on large graphs.  The
alpar@29
   249
main idea of VF2~Plus is to precompute a heuristic node order of the graph to be embedded, on which VF2 works more efficiently.
alpar@2
   250
Madarasi@19
   251
This paper introduces \emph{VF2++}, a new further improved algorithm
alpar@29
   252
for the graph and (induced) subgraph isomorphism problems. It is based on
Madarasi@19
   253
efficient cutting rules and determines a node order in which VF2 runs
Madarasi@19
   254
significantly faster on practical inputs.
Madarasi@19
   255
alpar@25
   256
The rest of the paper is structured as
alpar@25
   257
follows. Section~\ref{sec:ProbStat} defines the exact problems to be
alpar@25
   258
solved, Section~\ref{sec:VF2Alg} provides a description of VF2. Based
alpar@25
   259
on that, Section~\ref{sec:VF2ppAlg} introduces VF2++. Some technical
alpar@25
   260
details necessary for an efficient implementation are discussed in
alpar@29
   261
Section~\ref{sec:VF2ppImpl}. Finally, Section~\ref{sec:ExpRes}
Madarasi@28
   262
provides a detailed experimental evaluation of VF2++ and its comparison
alpar@27
   263
to the state-of-the-art algorithm.
Madarasi@26
   264
alpar@25
   265
It must also be mentioned that the C++ implementations of the
alpar@25
   266
algorithms have been made available for evaluation and use under an
alpar@29
   267
open-source license as a part of
alpar@29
   268
LEMON~\cite{o11:_lemon_open_sourc_c_graph_templ_librar} graph library.
Madarasi@22
   269
Madarasi@22
   270
\section{Problem Statement}\label{sec:ProbStat}
Madarasi@19
   271
This section provides a formal description of the problems to be
alpar@3
   272
solved.
alpar@2
   273
\subsection{Definitions}
alpar@2
   274
Madarasi@19
   275
Throughout the paper $G_{1}=(V_{1}, E_{1})$ and
Madarasi@19
   276
$G_{2}=(V_{2}, E_{2})$ denote two undirected graphs.
Madarasi@19
   277
Madarasi@19
   278
\begin{definition}
Madarasi@19
   279
$\mathcal{L}: (V_{1}\cup V_{2}) \longrightarrow K$ is a \textbf{node
Madarasi@19
   280
    label function}, where K is an arbitrary set. The elements in K
Madarasi@19
   281
  are the \textbf{node labels}. Two nodes, u and v are said to be
Madarasi@19
   282
  \textbf{equivalent} if $\mathcal{L}(u)=\mathcal{L}(v)$.
Madarasi@19
   283
\end{definition}
Madarasi@19
   284
alpar@29
   285
For the sake of simplicity, the graph, subgraph and induced subgraph
alpar@29
   286
isomorphisms are defined in a more general way.
Madarasi@19
   287
alpar@2
   288
\begin{definition}\label{sec:ismorphic}
Madarasi@19
   289
$G_{1}$ and $G_{2}$ are \textbf{isomorphic} (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19
   290
  V_{1} \longrightarrow V_{2}$ bijection, for which the
alpar@3
   291
  following is true:
alpar@2
   292
\begin{center}
Madarasi@20
   293
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19
   294
$\forall u,v\in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in{E_{2}}$
alpar@2
   295
\end{center}
alpar@2
   296
\end{definition}
Madarasi@19
   297
alpar@2
   298
\begin{definition}
Madarasi@19
   299
$G_{1}$ is a \textbf{subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19
   300
  V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3
   301
  following is true:
alpar@2
   302
\begin{center}
Madarasi@20
   303
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19
   304
$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Rightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2
   305
\end{center}
alpar@2
   306
\end{definition}
alpar@2
   307
alpar@2
   308
\begin{definition} 
Madarasi@19
   309
$G_{1}$ is an \textbf{induced subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists
Madarasi@19
   310
  \mathfrak{m}: V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3
   311
  following is true:
alpar@2
   312
\begin{center}
Madarasi@19
   313
$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and
Madarasi@19
   314
Madarasi@19
   315
$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow
Madarasi@19
   316
  (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2
   317
\end{center}
alpar@2
   318
\end{definition}
alpar@2
   319
alpar@2
   320
alpar@2
   321
\subsection{Common problems}\label{sec:CommProb}
alpar@2
   322
Madarasi@28
   323
The focus of this paper is on the following problems appearing in many applications.
alpar@2
   324
Madarasi@28
   325
The \textbf{subgraph isomorphism problem} is the following: is
Madarasi@19
   326
$G_{1}$ isomorphic to any subgraph of $G_{2}$ by a given node
alpar@3
   327
label?
alpar@2
   328
Madarasi@28
   329
The \textbf{induced subgraph isomorphism problem} asks the same about the
alpar@3
   330
existence of an induced subgraph.
alpar@2
   331
alpar@3
   332
The \textbf{graph isomorphism problem} can be defined as induced
alpar@3
   333
subgraph matching problem where the sizes of the two graphs are equal.
alpar@2
   334
alpar@29
   335
In addition, one may either want to find a \textbf{single} embedding or \textbf{enumerate} all of them.
alpar@2
   336
Madarasi@22
   337
\section{The VF2 Algorithm}\label{sec:VF2Alg}
Madarasi@28
   338
This algorithm is the basis of both the VF2++ and the VF2~Plus.  VF2
Madarasi@28
   339
is able to handle all the variations mentioned in Section~\ref{sec:CommProb}.  Although it can also handle directed graphs,
alpar@29
   340
for the sake of simplicity, only the undirected case is
alpar@3
   341
discussed.
alpar@2
   342
alpar@2
   343
alpar@2
   344
\subsection{Common notations}
Madarasi@19
   345
\indent Assume $G_{1}$ is searched in $G_{2}$.  The following
alpar@29
   346
definitions and notations is used throughout this paper.
alpar@2
   347
\begin{definition}
Madarasi@19
   348
An injection $\mathfrak{m} : D \longrightarrow V_2$ is called (partial) \textbf{mapping}, where $D\subseteq V_1$.
Madarasi@19
   349
\end{definition}
Madarasi@19
   350
Madarasi@19
   351
\begin{notation}
Madarasi@19
   352
$\mathfrak{D}(f)$ and $\mathfrak{R}(f)$ denote the domain and the range of a function $f$, respectively.
Madarasi@19
   353
\end{notation}
Madarasi@19
   354
Madarasi@19
   355
\begin{definition}
Madarasi@19
   356
Mapping $\mathfrak{m}$ \textbf{covers} a node $u\in V_1\cup V_2$ if $u\in \mathfrak{D}(\mathfrak{m})\cup \mathfrak{R}(\mathfrak{m})$.
alpar@2
   357
\end{definition}
alpar@2
   358
alpar@2
   359
\begin{definition}
Madarasi@19
   360
A mapping $\mathfrak{m}$ is $\mathbf{whole\ mapping}$ if $\mathfrak{m}$ covers all the
Madarasi@19
   361
nodes of $V_{1}$, i.e. $\mathfrak{D}(\mathfrak{m})=V_1$.
alpar@2
   362
\end{definition}
alpar@2
   363
alpar@2
   364
\begin{definition}
Madarasi@28
   365
Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : f(w)=\mathfrak{m}(w)$ and $f(u)=v$ holds, where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$; otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
alpar@2
   366
\end{definition}
alpar@2
   367
alpar@2
   368
\begin{notation}
Madarasi@19
   369
Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
Madarasi@28
   370
which can be substituted by any of the $\mathbf{SUB}$, $\mathbf{IND}$
alpar@29
   371
and $\mathbf{ISO}$ problems, which stand for the problems mentioned in Section~\ref{sec:CommProb} respectively.
alpar@2
   372
\end{notation}
alpar@2
   373
alpar@2
   374
\begin{definition}
Madarasi@28
   375
Let $\mathfrak{m}$ be a mapping. The \textbf{consistency function for } $\mathbf{PT}$ is a logical function, for which $\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if and only if $\mathfrak{m}$ satisfies the requirements of $\mathbf{PT}$ considering the subgraphs of $G_{1}$ and $G_{2}$ induced by $\mathfrak{D}(\mathfrak{m})$ and $\mathfrak{R}(\mathfrak{m})$, respectively.
Madarasi@28
   376
Madarasi@28
   377
%$\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
alpar@2
   378
\end{definition}
alpar@2
   379
alpar@2
   380
\begin{definition} 
Madarasi@19
   381
Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
Madarasi@28
   382
\textbf{cutting function for } $\mathbf{PT}$ if the following
Madarasi@19
   383
holds. $\mathbf{Cut_{PT}(\mathfrak{m})}$ is false if there exists a sequence of extend operations, which results in a whole mapping satisfying the requirements of $PT$.
alpar@2
   384
\end{definition}
alpar@2
   385
alpar@2
   386
\begin{definition}
Madarasi@28
   387
A mapping $\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
Madarasi@19
   388
  $Cons_{PT}(\mathfrak{m})$ is true.
alpar@2
   389
\end{definition}
alpar@2
   390
alpar@2
   391
$Cons_{PT}$ and $Cut_{PT}$ will often be used in the following form.
alpar@2
   392
\begin{notation}
Madarasi@19
   393
Let $\mathbf{Cons_{PT}(p, \mathfrak{m})}:=Cons_{PT}(extend(\mathfrak{m},p))$, and
Madarasi@19
   394
$\mathbf{Cut_{PT}(p, \mathfrak{m})}:=Cut_{PT}(extend(\mathfrak{m},p))$, where
Madarasi@19
   395
$p\in{V_{1}\backslash\mathfrak{D}(\mathfrak{m}) \!\times\!V_{2}\backslash\mathfrak{R}(\mathfrak{m})}$.
alpar@2
   396
\end{notation}
alpar@2
   397
alpar@3
   398
$Cons_{PT}$ will be used to check the consistency of the already
alpar@3
   399
covered nodes, while $Cut_{PT}$ is for looking ahead to recognize if
alpar@3
   400
no whole consistent mapping can contain the current mapping.
alpar@2
   401
alpar@2
   402
\subsection{Overview of the algorithm}
alpar@2
   403
Madarasi@28
   404
VF2 begins with an empty mapping and gradually extends it with respect to the consistency and cutting functions until a whole mapping is reached.
Madarasi@28
   405
Madarasi@28
   406
Algorithm~\ref{alg:VF2Pseu} is a high-level description of
Madarasi@28
   407
the VF2 algorithm. Each state of the matching process can
Madarasi@19
   408
be associated with a mapping $\mathfrak{m}$. The initial state
Madarasi@19
   409
is associated with a mapping $\mathfrak{m}$, for which
Madarasi@19
   410
$\mathfrak{D}(\mathfrak{m})=\emptyset$, i.e. it starts with an empty mapping.
alpar@2
   411
alpar@2
   412
alpar@2
   413
\begin{algorithm}
Madarasi@13
   414
\algtext*{EndIf}%ne nyomtasson end if-et
Madarasi@13
   415
\algtext*{EndFor}%ne
Madarasi@13
   416
\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2
   417
\caption{\hspace{0.5cm}$A\ high\ level\ description\ of\ VF2$}\label{alg:VF2Pseu}
alpar@2
   418
\begin{algorithmic}[1]
alpar@2
   419
Madarasi@19
   420
\Procedure{VF2}{Mapping $\mathfrak{m}$, ProblemType $PT$}
Madarasi@19
   421
  \If{$\mathfrak{m}$ covers
Madarasi@19
   422
    $V_{1}$} \State Output($\mathfrak{m}$)
Madarasi@19
   423
  \Else
Madarasi@28
   424
  \State Compute the set $P_\mathfrak{m}$ of the candidate pairs for extending $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
Madarasi@19
   425
    $\neg$Cut$_{PT}$($p,\mathfrak{m}$)}
Madarasi@19
   426
    \State \textbf{call}
Madarasi@19
   427
  VF2($extend(\mathfrak{m},p)$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
alpar@2
   428
\end{algorithmic}
alpar@2
   429
\end{algorithm}
alpar@2
   430
alpar@2
   431
Madarasi@19
   432
For the current mapping $\mathfrak{m}$, the algorithm computes $P_\mathfrak{m}$, the set of
Madarasi@28
   433
candidate node pairs for extending the current mapping $\mathfrak{m}$.
alpar@2
   434
Madarasi@19
   435
For each pair $p$ in $P_\mathfrak{m}$, $Cons_{PT}(p,\mathfrak{m})$ and
Madarasi@19
   436
$Cut_{PT}(p,\mathfrak{m})$ are evaluated. If the former is true and
Madarasi@19
   437
the latter is false, the whole process is recursively applied to
Madarasi@19
   438
$extend(\mathfrak{m},p)$. Otherwise, $extend(\mathfrak{m},p)$ is not consistent by $PT$, or it
Madarasi@19
   439
can be proved that $\mathfrak{m}$ can not be extended to a whole mapping.
alpar@2
   440
alpar@29
   441
The correctness of the procedure follows from the claim below.
alpar@29
   442
Madarasi@28
   443
\begin{claim}\label{claim:consMapps}
alpar@3
   444
Through consistent mappings, only consistent whole mappings can be
Madarasi@19
   445
reached, and all the consistent whole mappings are reachable through
alpar@3
   446
consistent mappings.
alpar@2
   447
\end{claim}
alpar@2
   448
Madarasi@19
   449
Note that a mapping may be reached in exponentially many different ways, since the
Madarasi@19
   450
order of extensions does not influence the nascent mapping.
alpar@2
   451
Madarasi@28
   452
However, one may make the following observations.
Madarasi@28
   453
Madarasi@28
   454
%\begin{claim}
Madarasi@28
   455
%\label{claim:claimTotOrd}
Madarasi@28
   456
%Let $\prec$ be an arbitrary total ordering relation on $V_{1}$.  If
Madarasi@28
   457
%the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
Madarasi@28
   458
%\begin{center}
Madarasi@28
   459
%$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
Madarasi@28
   460
%\end{center}
Madarasi@28
   461
%then no mapping can be reached more than once, and each whole mapping %remains reachable.
Madarasi@28
   462
%\end{claim}
Madarasi@28
   463
Madarasi@28
   464
\begin{definition}
Madarasi@28
   465
A total order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
Madarasi@28
   466
$V_{1}$ is \textbf{matching order} if VF2 can cover $u_{\sigma(d)}$ on the $d$-th level for all $d\in\{1,..,|V_{1}|\}$.
Madarasi@28
   467
\end{definition}
alpar@2
   468
alpar@2
   469
\begin{claim}
alpar@2
   470
\label{claim:claimTotOrd}
Madarasi@28
   471
If VF2 is prescribed to cover the nodes of $G_1$ according to a matching order, then no mapping can be reached more than once and each whole mapping remains reachable.
alpar@2
   472
\end{claim}
alpar@2
   473
Madarasi@28
   474
Note that the cornerstone of the improvements to VF2 is to choose a proper
Madarasi@28
   475
matching order.
alpar@2
   476
Madarasi@19
   477
\subsection{The candidate set}
alpar@2
   478
\label{candidateComputingVF2}
Madarasi@19
   479
Let $P_\mathfrak{m}$ be the set of the candidate pairs for inclusion in $\mathfrak{m}$.
alpar@2
   480
alpar@2
   481
\begin{notation}
Madarasi@19
   482
Let $\mathbf{T_{1}(\mathfrak{m})}:=\{u \in V_{1}\backslash\mathfrak{D}(\mathfrak{m}) : \exists \tilde{u}\in{\mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}}\}$, and
Madarasi@19
   483
 $\mathbf{T_{2}(\mathfrak{m})} := \{v \in V_{2}\backslash\mathfrak{R}(\mathfrak{m}) : \exists\tilde{v}\in{\mathfrak{R}(\mathfrak{m}):(v,\tilde{v})\in E_{2}}\}$.
alpar@2
   484
\end{notation}
alpar@2
   485
Madarasi@28
   486
The set $P_\mathfrak{m}$ contains the pairs of uncovered neighbours of covered
Madarasi@17
   487
nodes, and if there is not such a node pair, all the pairs containing
alpar@3
   488
two uncovered nodes are added. Formally, let
alpar@2
   489
\[
Madarasi@19
   490
 P_\mathfrak{m}\!=\!
alpar@2
   491
  \begin{cases} 
Madarasi@19
   492
   T_{1}(\mathfrak{m})\times T_{2}(\mathfrak{m})&\hspace{-0.15cm}\text{if }
Madarasi@19
   493
   T_{1}(\mathfrak{m})\!\neq\!\emptyset\ \text{and }T_{2}(\mathfrak{m})\!\neq
Madarasi@19
   494
   \emptyset,\\ (V_{1}\!\setminus\!\mathfrak{D}(\mathfrak{m}))\!\times\!(V_{2}\!\setminus\!\mathfrak{R}(\mathfrak{m}))
Madarasi@19
   495
   &\hspace{-0.15cm}\text{otherwise}.
alpar@2
   496
  \end{cases}
alpar@2
   497
\]
alpar@2
   498
alpar@2
   499
\subsection{Consistency}
Madarasi@28
   500
Let $p=(u,v)\in V_{1}\times V_{2}$, and suppose $\mathfrak{m}$ is a consistent mapping by
Madarasi@19
   501
$PT$. $Cons_{PT}(p,\mathfrak{m})$ checks whether
Madarasi@28
   502
adding pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
Madarasi@15
   503
Madarasi@28
   504
For example, the consistency function of the induced subgraph isomorphism problem is the following.
alpar@2
   505
\begin{notation}
Madarasi@19
   506
Let $\mathbf{\Gamma_{1} (u)}:=\{\tilde{u}\in V_{1} :
Madarasi@19
   507
(u,\tilde{u})\in E_{1}\}$, and $\mathbf{\Gamma_{2}
Madarasi@28
   508
  (v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$. That is, $\mathbf{\Gamma_{i} (w)}$ denotes the set of neighbours of node $w$ in $G_i$ $(i=1,2)$.
alpar@2
   509
\end{notation}
alpar@2
   510
alpar@2
   511
\begin{claim}
Madarasi@28
   512
$extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ if and only if $\mathfrak{m}$ is consistent and $(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
Madarasi@28
   513
\Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$. 
Madarasi@28
   514
\end{claim}
Madarasi@28
   515
Madarasi@28
   516
The following formulation gives an efficient way of calculating $Cons_{IND}$.
Madarasi@28
   517
Madarasi@28
   518
\begin{claim}
Madarasi@28
   519
$Cons_{IND}((u,v),\mathfrak{m}):=Cons_{IND}(\mathfrak{m})\wedge\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
Madarasi@19
   520
  (\forall \tilde{u}\in \Gamma_{1}(u)
Madarasi@28
   521
  \cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is the consistency function for $IND$.
alpar@2
   522
\end{claim}
alpar@2
   523
alpar@2
   524
\subsection{Cutting rules}
Madarasi@19
   525
$Cut_{PT}(p,\mathfrak{m})$ is defined by a collection of efficiently
Madarasi@19
   526
verifiable conditions. The requirement is that $Cut_{PT}(p,\mathfrak{m})$ can
Madarasi@19
   527
be true only if it is impossible to extend $extend(\mathfrak{m},p)$ to a
alpar@3
   528
whole mapping.
Madarasi@15
   529
Madarasi@28
   530
As an example, a cutting function of induced subgraph isomorphism problem is presented.
alpar@2
   531
\begin{notation}
Madarasi@19
   532
Let $\mathbf{\tilde{T}_{1}}(\mathfrak{m}):=(V_{1}\backslash
Madarasi@19
   533
\mathfrak{D}(\mathfrak{m}))\backslash T_{1}(\mathfrak{m})$, and
Madarasi@19
   534
\\ $\mathbf{\tilde{T}_{2}}(\mathfrak{m}):=(V_{2}\backslash
Madarasi@19
   535
\mathfrak{R}(\mathfrak{m}))\backslash T_{2}(\mathfrak{m})$.
alpar@2
   536
\end{notation}
Madarasi@15
   537
alpar@2
   538
\begin{claim}
Madarasi@19
   539
$Cut_{IND}((u,v),\mathfrak{m}):= |\Gamma_{2} (v)\ \cap\ T_{2}(\mathfrak{m})| <
Madarasi@19
   540
  |\Gamma_{1} (u)\ \cap\ T_{1}(\mathfrak{m})| \vee |\Gamma_{2}(v)\cap
Madarasi@19
   541
  \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}(u)\cap
Madarasi@28
   542
  \tilde{T}_{1}(\mathfrak{m})|$ is a cutting function for $IND$.
alpar@2
   543
\end{claim}
alpar@2
   544
Madarasi@22
   545
\section{The VF2++ Algorithm}\label{sec:VF2ppAlg}
Madarasi@28
   546
Although any matching order makes the search space of VF2 a
alpar@3
   547
tree, its choice turns out to dramatically influence the number of
alpar@3
   548
visited states. The goal is to determine an efficient one as quickly
alpar@3
   549
as possible.
alpar@2
   550
Madarasi@28
   551
The main reason for the superiority of VF2++ over VF2 is twofold. Firstly,
alpar@29
   552
taking into account the structure and the node labeling of the graph,
Madarasi@28
   553
VF2++ determines a matching order in which most of the unfruitful
alpar@3
   554
branches of the search space can be pruned immediately. Secondly,
alpar@3
   555
introducing more efficient --- nevertheless still easier to compute
alpar@3
   556
--- cutting rules reduces the chance of going astray even further.
alpar@2
   557
Madarasi@28
   558
In addition to the usual subgraph isomorphism problem, specialized versions
alpar@29
   559
for induced subgraph and graph isomorphism problems have also been
Madarasi@22
   560
designed.
alpar@2
   561
Madarasi@28
   562
Note that a weaker version of the cutting rules of VF2++ and an efficient
Madarasi@28
   563
candidate set calculation method were described in~\cite{VF2Plus}.
alpar@2
   564
alpar@3
   565
It should be noted that all the methods described in this section are
Madarasi@22
   566
extendable to handle directed graphs and edge labels as well.
alpar@3
   567
The basic ideas and the detailed description of VF2++ are provided in
Madarasi@22
   568
the following.\newline
alpar@2
   569
Madarasi@19
   570
The goal is to find a matching order in which the algorithm is able to
Madarasi@19
   571
recognize inconsistency or prune the infeasible branches on the
Madarasi@19
   572
highest levels and goes deep only if it is needed.
Madarasi@19
   573
Madarasi@19
   574
\begin{notation}
Madarasi@28
   575
Let $\mathbf{Conn_{H}(u)}:=|\Gamma_{1}(u)\cap H|$, that is the
Madarasi@19
   576
number of neighbours of u which are in H, where $u\in V_{1} $ and
Madarasi@19
   577
$H\subseteq V_{1}$.
Madarasi@19
   578
\end{notation}
Madarasi@19
   579
Madarasi@19
   580
The principal question is the following. Suppose a mapping $\mathfrak{m}$ is
Madarasi@19
   581
given. For which node of $T_{1}(\mathfrak{m})$ is the hardest to find a
alpar@29
   582
consistent pair in $G_{2}$? The more covered neighbors a node in
Madarasi@19
   583
$T_{1}(\mathfrak{m})$ has --- i.e. the largest $Conn_{\mathfrak{D}(\mathfrak{m})}$ it has
alpar@29
   584
---, the more rare-to-satisfy consistency constraints for its pair
Madarasi@19
   585
are given.
Madarasi@19
   586
Madarasi@28
   587
Most of the graphs of biological and chemical structures are sparse, thus several nodes in
Madarasi@19
   588
$T_{1}(\mathfrak{m})$ may have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$, which makes
Madarasi@19
   589
reasonable to define a secondary and a tertiary order between them.
Madarasi@19
   590
The observation above proves itself to be as determining, that the
alpar@29
   591
secondary ordering prefers nodes with the most uncovered neighbors
Madarasi@19
   592
among which have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$ to increase
Madarasi@28
   593
$Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes as much, as possible.  The tertiary ordering prefers nodes having the rarest uncovered labels in $G_2$.
Madarasi@19
   594
Madarasi@28
   595
Note that the secondary ordering is the same as ordering by degrees,
Madarasi@19
   596
which is a static data in front of the above used.
Madarasi@19
   597
Madarasi@19
   598
These rules can easily result in a matching order which contains the
Madarasi@19
   599
nodes of a long path successively, whose nodes may have low $Conn$ and
alpar@29
   600
is easily to match into $G_{2}$. To try to avoid that, a
alpar@29
   601
Breadth-First-Search order is used, and on each of its levels, the
alpar@29
   602
ordering procedure described above is applied.
Madarasi@19
   603
\newline
Madarasi@19
   604
alpar@29
   605
In the following, examples are shown, demonstrating that VF2 may be
alpar@29
   606
slow are, even though a matching can be found easily by using a proper
Madarasi@19
   607
matching order.
Madarasi@19
   608
Madarasi@19
   609
\begin{example}
Madarasi@19
   610
Suppose $G_{1}$ can be mapped into $G_{2}$ in many ways
Madarasi@19
   611
without node labels. Let $u\in V_{1}$ and $v\in V_{2}$.
Madarasi@19
   612
\newline
Madarasi@19
   613
$\mathcal{L}(u):=black$
Madarasi@19
   614
\newline
Madarasi@19
   615
$\mathcal{L}(v):=black$
Madarasi@19
   616
\newline
Madarasi@22
   617
$\mathcal{L}(\tilde{u}):=red \ \forall \tilde{u}\in V_{1}\backslash
Madarasi@22
   618
\{u\}$
Madarasi@19
   619
\newline
Madarasi@22
   620
$\mathcal{L}(\tilde{v}):=red \ \forall \tilde{v}\in V_{2}\backslash
Madarasi@22
   621
\{v\}$
Madarasi@19
   622
Madarasi@19
   623
Now, any mapping by $\mathcal{L}$ must contain $(u,v)$, since
Madarasi@19
   624
$u$ is black and no node in $V_{2}$ has a black label except
Madarasi@19
   625
$v$. If unfortunately $u$ were the last node which will get covered,
Madarasi@19
   626
VF2 would check only in the last steps, whether $u$ can be matched to
Madarasi@19
   627
$v$.
Madarasi@28
   628
alpar@29
   629
However, had $u$ been the first matched node, $u$ would have been
alpar@29
   630
matched immediately to $v$, so all the mappings would have been
Madarasi@19
   631
precluded in which node labels can not correspond.
Madarasi@19
   632
\end{example}
Madarasi@19
   633
Madarasi@19
   634
\begin{example}
Madarasi@28
   635
Suppose there is no node label given, and $G_{1}$ is a small graph that can not be mapped into $G_{2}$ and $u\in V_{1}$.
Madarasi@19
   636
\newline
Madarasi@19
   637
Let $G'_{1}:=(V_{1}\cup
Madarasi@19
   638
\{u'_{1},u'_{2},..,u'_{k}\},E_{1}\cup
Madarasi@19
   639
\{(u,u'_{1}),(u'_{1},u'_{2}),..,(u'_{k-1},u'_{k})\})$, that is,
Madarasi@19
   640
$G'_{1}$ is $G_{1}\cup \{ a\ k$ long path, which is disjoint
Madarasi@19
   641
from $G_{1}$ and one of its starting points is connected to $u\in
Madarasi@19
   642
V_{1}\}$.
Madarasi@28
   643
alpar@29
   644
If, unfortunately, the nodes of the path were the first $k$ nodes in the
alpar@29
   645
matching order, the algorithm would iterate through all the possible $k$
Madarasi@19
   646
long paths in $G_{2}$, and it would recognize that no path can be
Madarasi@19
   647
extended to $G'_{1}$.
Madarasi@19
   648
\newline
Madarasi@19
   649
However, had it started by the matching of $G_{1}$, it would not
Madarasi@19
   650
have matched any nodes of the path.
Madarasi@19
   651
\end{example}
Madarasi@19
   652
Madarasi@19
   653
These examples may look artificial, but the same problems also appear
Madarasi@19
   654
in real-world instances, even though in a less obvious way.
Madarasi@19
   655
Madarasi@28
   656
%\subsection{Preparations}
Madarasi@28
   657
%\begin{claim}
Madarasi@28
   658
%\label{claim:claimCoverFromLeft}
Madarasi@28
   659
%The total ordering relation uniquely determines a node order, in which
Madarasi@28
   660
%the nodes of $V_{1}$ will be covered by VF2. From the point of
Madarasi@28
   661
%view of the matching procedure, this means, that always the same node
Madarasi@28
   662
%of $G_{1}$ will be covered on the $d$-th level.
Madarasi@28
   663
%\end{claim}
alpar@2
   664
Madarasi@28
   665
%\begin{definition}
Madarasi@28
   666
%An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
Madarasi@28
   667
%$V_{1}$ is \textbf{matching order} if there exists $\prec$ total
Madarasi@28
   668
%ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
Madarasi@28
   669
%pair for $u_{\sigma(d)}$ for all $d\in\{1,..,|V_{1}|\}$.
Madarasi@28
   670
%\end{definition}
alpar@2
   671
Madarasi@28
   672
%\begin{claim}\label{claim:MOclaim}
Madarasi@28
   673
%A total ordering is matching order iff the nodes of every component
Madarasi@28
   674
%form an interval in the node sequence, and every node connects to a
Madarasi@28
   675
%previous node in its component except the first node of each component.
Madarasi@28
   676
%\end{claim}
alpar@2
   677
Madarasi@28
   678
%In summary, a total ordering always uniquely determines a matching
Madarasi@28
   679
%order, and every matching order can be determined by a total ordering,
Madarasi@28
   680
%however, more than one different total orderings may determine the
Madarasi@28
   681
%same matching order.
alpar@2
   682
Madarasi@28
   683
\subsection{Matching order}
alpar@2
   684
\begin{notation}
Madarasi@19
   685
Let \textbf{F$_\mathcal{M}$(l)}$:=|\{v\in V_{2} :
Madarasi@28
   686
l=\mathcal{L}(v)\}|-|\{u\in \mathcal{M} : l=\mathcal{L}(u)\}|$,
Madarasi@19
   687
where $l$ is a label and $\mathcal{M}\subseteq V_{1}$.
alpar@2
   688
\end{notation}
alpar@2
   689
Madarasi@28
   690
\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{(-f)}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
alpar@2
   691
\end{definition}
alpar@2
   692
Madarasi@28
   693
\begin{notation}
Madarasi@28
   694
Let $deg(v)$ denote the degree of node $v$.
Madarasi@28
   695
\end{notation}
Madarasi@28
   696
Madarasi@28
   697
\begin{algorithm}[H]
Madarasi@8
   698
\algtext*{EndIf}
Madarasi@8
   699
\algtext*{EndProcedure}
alpar@2
   700
\algtext*{EndWhile}
Madarasi@13
   701
\algtext*{EndFor}
alpar@2
   702
\caption{\hspace{0.5cm}$The\ method\ of\ VF2++\ for\ determining\ the\ node\ order$}\label{alg:VF2PPPseu}
alpar@2
   703
\begin{algorithmic}[1]
alpar@3
   704
\Procedure{VF2++order}{} \State $\mathcal{M}$ := $\emptyset$
Madarasi@19
   705
\Comment{matching order} \While{$V_{1}\backslash \mathcal{M}
alpar@3
   706
  \neq\emptyset$} \State $r\in$ arg max$_{deg}$ (arg
Madarasi@19
   707
min$_{F_\mathcal{M}\circ \mathcal{L}}(V_{1}\backslash
alpar@3
   708
\mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
alpar@3
   709
root node $r$.  \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
Madarasi@28
   710
$d$-th level \State Process $V_d$ \Comment{See Algorithm~\ref{alg:VF2PPProcess1}} \EndFor
alpar@3
   711
\EndWhile \EndProcedure
alpar@2
   712
\end{algorithmic}
alpar@2
   713
\end{algorithm}
alpar@2
   714
alpar@2
   715
\begin{algorithm}
Madarasi@8
   716
\algtext*{EndIf}
Madarasi@8
   717
\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2
   718
\algtext*{EndWhile}
Madarasi@8
   719
\caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
alpar@2
   720
\begin{algorithmic}[1]
Madarasi@17
   721
\Procedure{VF2++ProcessLevel}{$V_{d}$} \While{$V_d\neq\emptyset$}
Madarasi@28
   722
\State $m\in$ arg min$_{F_\mathcal{M}\circ\ \mathcal{L}}($ arg max$_{deg}($arg
alpar@3
   723
max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
alpar@3
   724
\State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
alpar@3
   725
$F_\mathcal{M}$ \EndWhile \EndProcedure
alpar@2
   726
\end{algorithmic}
alpar@2
   727
\end{algorithm}
alpar@2
   728
Madarasi@28
   729
Algorithm~\ref{alg:VF2PPPseu} is a high-level description of the
alpar@4
   730
matching order procedure of VF2++. It computes a BFS tree for each
Madarasi@19
   731
component in ascending order of their rarest node labels and largest $deg$,
Madarasi@28
   732
whose root vertex is the minimal node of its component. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
Madarasi@8
   733
lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
Madarasi@8
   734
to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
alpar@2
   735
Madarasi@28
   736
\begin{claim}
Madarasi@28
   737
Algorithm~\ref{alg:VF2PPPseu} provides a matching order.
Madarasi@28
   738
\end{claim}
alpar@2
   739
alpar@2
   740
alpar@2
   741
\subsection{Cutting rules}
alpar@2
   742
\label{VF2PPCuttingRules}
Madarasi@19
   743
This section presents the cutting rules of VF2++, which are improved by using extra information coming from the node labels.
alpar@2
   744
\begin{notation}
Madarasi@19
   745
Let $\mathbf{\Gamma_{1}^{l}(u)}:=\{\tilde{u} : \mathcal{L}(\tilde{u})=l
Madarasi@19
   746
\wedge \tilde{u}\in \Gamma_{1} (u)\}$ and
Madarasi@19
   747
$\mathbf{\Gamma_{2}^{l}(v)}:=\{\tilde{v} : \mathcal{L}(\tilde{v})=l \wedge
Madarasi@19
   748
\tilde{v}\in \Gamma_{2} (v)\}$, where $u\in V_{1}$, $v\in
Madarasi@19
   749
V_{2}$ and $l$ is a label.
alpar@2
   750
\end{notation}
alpar@2
   751
Madarasi@28
   752
\begin{claim}[Cutting function for ISO]
Madarasi@28
   753
\[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!\neq\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\  \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| \neq |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for ISO.
alpar@2
   754
\end{claim}
Madarasi@13
   755
Madarasi@28
   756
\begin{claim}[Cutting function for IND]
Madarasi@28
   757
\[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for IND.
Madarasi@28
   758
\end{claim}
Madarasi@28
   759
Madarasi@28
   760
\begin{claim}[Cutting function for SUB]
Madarasi@28
   761
\[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\] is a cutting function for SUB.
Madarasi@19
   762
\end{claim}
alpar@2
   763
Madarasi@19
   764
Madarasi@19
   765
Madarasi@22
   766
\section{Implementation details}\label{sec:VF2ppImpl}
alpar@3
   767
This section provides a detailed summary of an efficient
alpar@3
   768
implementation of VF2++.
Madarasi@28
   769
\begin{notation}
Madarasi@28
   770
Let $\Delta_1$ and $\Delta_2$ denote the largest degree in $G_1$ and $G_2$, respectively, and let $\Delta=\max\{\Delta_1,\Delta_2\}$.
Madarasi@28
   771
\end{notation}
Madarasi@22
   772
\subsection{Storing a mapping}
alpar@3
   773
After fixing an arbitrary node order ($u_0, u_1, ..,
Madarasi@28
   774
u_{|V_{1}|-1}$) of $G_{1}$, an array $M$ can be used to store
alpar@3
   775
the current mapping in the following way.
alpar@2
   776
\[
alpar@3
   777
 M[i] =
alpar@2
   778
  \begin{cases} 
Madarasi@19
   779
   v & if\ (u_i,v)\ is\ in\ the\ mapping\\ INV\!ALI\!D &
Madarasi@17
   780
   if\ no\ node\ has\ been\ mapped\ to\ u_i,
alpar@2
   781
  \end{cases}
alpar@2
   782
\]
Madarasi@28
   783
where $i\in\{0,1, ..,|V_{1}|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
alpar@3
   784
means "no node".
Madarasi@22
   785
\subsection{Avoiding the recurrence}
alpar@4
   786
The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
Madarasi@9
   787
as a \textit{while loop}, which has a loop counter $depth$ denoting the
Madarasi@28
   788
current depth of the recursion. Fixing a matching order, let $M$
Madarasi@28
   789
denote the array storing the current mapping. Observe that
Madarasi@19
   790
$M$ is $INV\!ALI\!D$ from index $depth$+1 and not $INV\!ALI\!D$ before
Madarasi@9
   791
$depth$. $M[depth]$ changes
alpar@3
   792
while the state is being processed, but the property is held before
alpar@3
   793
both stepping back to a predecessor state and exploring a successor
alpar@3
   794
state.
alpar@2
   795
alpar@29
   796
The necessary part of the candidate set is easy to maintain or
alpar@29
   797
compute by following
Madarasi@28
   798
the steps described in Section~\ref{candidateComputingVF2}. A much faster method
Madarasi@28
   799
has been designed for biological and sparse graphs, see the next
alpar@3
   800
section for details.
alpar@2
   801
Madarasi@22
   802
\subsection{Calculating the candidates for a node}
Madarasi@28
   803
The task is not to maintain the candidate set, but to generate the
Madarasi@19
   804
candidate nodes in $G_{2}$ for a given node $u\in V_{1}$.  In
Madarasi@20
   805
case of any of the three problem types and a mapping $\mathfrak{m}$, if a node $v\in
Madarasi@19
   806
V_{2}$ is a potential pair of $u\in V_{1}$, then $\forall
Madarasi@20
   807
u'\in \mathfrak{D}(\mathfrak{m}) : (u,u')\in
Madarasi@20
   808
E_{1}\Rightarrow (v,\mathfrak{m}(u'))\in
alpar@29
   809
E_{2}$. That is, each covered neighbor of $u$ has to be mapped to
alpar@29
   810
a covered neighbor of $v$, i.e. selecting arbitrarily a covered neighbor $u'$ of $u$, all of the admissible candidates for $u$ are among the neighbors of $\mathfrak{m}(u')$.
alpar@2
   811
Madarasi@28
   812
Having said that, an algorithm running in $\Theta(\Delta_2)$ time is
alpar@3
   813
describable if there exists a covered node in the component containing
Madarasi@17
   814
$u$, and a linear one otherwise.
alpar@2
   815
alpar@2
   816
Madarasi@22
   817
\subsection{Determining the node order}
alpar@3
   818
This section describes how the node order preprocessing method of
alpar@3
   819
VF2++ can efficiently be implemented.
alpar@2
   820
alpar@3
   821
For using lookup tables, the node labels are associated with the
alpar@3
   822
numbers $\{0,1,..,|K|-1\}$, where $K$ is the set of the labels. It
Madarasi@9
   823
enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
alpar@3
   824
$\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
Madarasi@28
   825
in $V_{2}$ having label $i$, which is easy to compute in
Madarasi@28
   826
$\Theta(|V_{2}|)$ steps.
alpar@2
   827
Madarasi@19
   828
Representing $\mathcal{M}\subseteq V_{1}$ as an array of
Madarasi@28
   829
size $|V_{1}|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done in-place by swapping nodes.
alpar@2
   830
Madarasi@22
   831
\subsection{Cutting rules}
alpar@29
   832
Section~\ref{VF2PPCuttingRules} described the cutting rules
alpar@29
   833
using the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
alpar@29
   834
and $\tilde T_{2}$, which are dependent on the current mapping. The aim is to check the labeled cutting
Madarasi@28
   835
rules of VF2++ in $\Theta(\Delta)$ time.
alpar@2
   836
alpar@29
   837
Firstly, suppose that these four sets are given a way, that
alpar@3
   838
checking whether a node is in a certain set takes constant time,
alpar@3
   839
e.g. they are given by their 0-1 characteristic vectors. Let $L$ be an
alpar@3
   840
initially zero integer lookup table of size $|K|$. After incrementing
Madarasi@19
   841
$L[\mathcal{L}(u')]$ for all $u'\in \Gamma_{1}(u) \cap T_{1}(\mathfrak{m})$ and
Madarasi@19
   842
decrementing $L[\mathcal{L}(v')]$ for all $v'\in\Gamma_{2} (v) \cap
alpar@29
   843
T_{2}(\mathfrak{m})$, the first part of the cutting rules can be checked in
Madarasi@28
   844
$\Theta(\Delta)$ time by considering the proper signs of $L$. Setting $L$
Madarasi@28
   845
to zero takes $\Theta(\Delta)$ time again, which makes it possible to use
Madarasi@9
   846
the same table through the whole algorithm. The second part of the
alpar@3
   847
cutting rules can be verified using the same method with $\tilde
Madarasi@19
   848
T_{1}$ and $\tilde T_{2}$ instead of $T_{1}$ and
Madarasi@28
   849
$T_{2}$. Thus, the overall time complexity is $\Theta(\Delta)$.
alpar@2
   850
Madarasi@28
   851
To maintain the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
Madarasi@28
   852
and $\tilde T_{2}$, two other integer lookup tables storing the number of covered neighbours of the nodes of the two graphs can be used. This representation allows constant-time membership checking, furthermore it is maintainable in $\Theta(\Delta)$ time whenever a node pair is added or subtracted by incrementing
alpar@3
   853
or decrementing the proper indices. A further improvement is that the
Madarasi@19
   854
values of $L[\mathcal{L}(u')]$ in case of checking $u$ are dependent only on
Madarasi@28
   855
$u$, i.e. on the current depth of the recursion, so for each $u\in V_{1}$, an array of pairs \textit{(label, number of such labels)} can store $L$. Note that these arrays are at most of size
Madarasi@28
   856
$\Delta_1$ if pairs with non-appearing node labels are discarded.
alpar@2
   857
Madarasi@19
   858
Using similar techniques, the consistency function can be evaluated in
Madarasi@28
   859
$\Theta(\Delta)$ steps, as well.
alpar@2
   860
Madarasi@22
   861
\section{Experimental results}\label{sec:ExpRes}
Madarasi@28
   862
This section compares the performance of VF2++ and VF2~Plus. According to
Madarasi@19
   863
our experience, both algorithms run faster than VF2 with orders of
Madarasi@19
   864
magnitude, thus its inclusion was not reasonable.
alpar@2
   865
alpar@29
   866
The algorithms were implemented in C++ using the open-source LEMON
alpar@29
   867
graph and network optimization
alpar@29
   868
library~\cite{LEMON}\cite{o11:_lemon_open_sourc_c_graph_templ_librar}. The
alpar@29
   869
tests were carried out on a Linux-based system with an Intel i7 X980
alpar@29
   870
3.33 GHz CPU and 6 GB of RAM.
alpar@29
   871
alpar@2
   872
\subsection{Biological graphs}
alpar@3
   873
The tests have been executed on a recent biological dataset created
alpar@3
   874
for the International Contest on Pattern Search in Biological
Madarasi@28
   875
Databases~\cite{Content}, which has been constructed of molecule,
Madarasi@7
   876
protein and contact map graphs extracted from the Protein Data
Madarasi@28
   877
Bank~\cite{ProteinDataBank}.
alpar@2
   878
alpar@3
   879
The molecule dataset contains small graphs with less than 100 nodes
alpar@3
   880
and an average degree of less than 3. The protein dataset contains
alpar@3
   881
graphs having 500-10 000 nodes and an average degree of 4, while the
alpar@3
   882
contact map dataset contains graphs with 150-800 nodes and an average
alpar@3
   883
degree of 20.  \\
alpar@2
   884
Madarasi@28
   885
In the following, both the induced subgraph and the graph
Madarasi@28
   886
isomorphism problems will be examined.
alpar@29
   887
This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For the running times, please see Figure~\ref{fig:bioIND}.
Madarasi@7
   888
Madarasi@7
   889
In an other experiment, the nodes of each graph in the database had been
Madarasi@7
   890
shuffled, and an isomorphism between the shuffled and the original
Madarasi@28
   891
graph was searched. The running times are shown on Figure~\ref{fig:bioISO}.
Madarasi@7
   892
Madarasi@7
   893
Madarasi@17
   894
\begin{figure}[H]
Madarasi@17
   895
\vspace*{-2cm}
Madarasi@17
   896
\hspace*{-1.5cm}
Madarasi@17
   897
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17
   898
\begin{figure}[H]
Madarasi@17
   899
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28
   900
\begin{axis}[title=Molecules IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17
   901
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17
   902
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
   903
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
   904
  format/1000 sep = \kern 0.08em}]
Madarasi@17
   905
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17
   906
\addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
Madarasi@17
   907
  size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
Madarasi@17
   908
\end{axis}
Madarasi@17
   909
\end{tikzpicture}
Madarasi@17
   910
\caption{In the case of molecules, the algorithms have
Madarasi@17
   911
  similar behaviour, but VF2++ is almost two times faster even on such
Madarasi@17
   912
  small graphs.} \label{fig:INDMolecule}
Madarasi@17
   913
\end{figure}
Madarasi@17
   914
\end{subfigure}
Madarasi@17
   915
\hspace*{1.5cm}
Madarasi@17
   916
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17
   917
\begin{figure}[H]
Madarasi@17
   918
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28
   919
\begin{axis}[title=Contact maps IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17
   920
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17
   921
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
   922
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
   923
  format/1000 sep = \kern 0.08em}]
Madarasi@17
   924
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17
   925
\addplot table {Orig/ContactMaps.128.txt};
Madarasi@17
   926
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
Madarasi@17
   927
        {VF2PPLabel/ContactMaps.128.txt};
Madarasi@17
   928
\end{axis}
Madarasi@17
   929
\end{tikzpicture}
Madarasi@17
   930
\caption{On contact maps, VF2++ runs almost in constant time, while VF2
alpar@29
   931
  Plus has a near-linear behavior.} \label{fig:INDContact}
Madarasi@17
   932
\end{figure}
Madarasi@17
   933
\end{subfigure}
Madarasi@17
   934
Madarasi@17
   935
\begin{center}
Madarasi@17
   936
\vspace*{-0.5cm}
Madarasi@17
   937
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17
   938
\begin{figure}[H]
Madarasi@17
   939
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28
   940
  \begin{axis}[title=Proteins IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17
   941
  =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17
   942
    west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
   943
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
   944
  format/1000 sep = \kern 0.08em}] %\addplot+[only marks] table
Madarasi@17
   945
    {proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
Madarasi@17
   946
    table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
Madarasi@17
   947
      size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
Madarasi@17
   948
  \end{axis}
Madarasi@17
   949
  \end{tikzpicture}
Madarasi@28
   950
\caption{Both of the algorithms have linear behaviour on protein
Madarasi@17
   951
  graphs. VF2++ is more than 10 times faster than VF2
Madarasi@17
   952
  Plus.} \label{fig:INDProt}
Madarasi@17
   953
\end{figure}
Madarasi@17
   954
\end{subfigure}
Madarasi@17
   955
\end{center}
Madarasi@17
   956
\vspace*{-0.5cm}
Madarasi@28
   957
\caption{\normalsize{Induced subgraph isomorphism problem on biological graphs}}\label{fig:bioIND}
Madarasi@17
   958
\end{figure}
Madarasi@17
   959
alpar@2
   960
alpar@2
   961
\begin{figure}[H]
Madarasi@7
   962
\vspace*{-2cm}
Madarasi@7
   963
\hspace*{-1.5cm}
Madarasi@7
   964
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
   965
\begin{figure}[H]
Madarasi@7
   966
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28
   967
\begin{axis}[title=Molecules ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7
   968
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7
   969
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
   970
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
   971
  format/1000 sep = \kern 0.08em}]
Madarasi@7
   972
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7
   973
\addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7
   974
  size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
Madarasi@7
   975
\end{axis}
Madarasi@7
   976
\end{tikzpicture}
Madarasi@28
   977
\caption{The results are close to each other on contact maps, but VF2++ seems to be slightly faster as the number of nodes increases.
Madarasi@28
   978
  }\label{fig:ISOMolecule}
Madarasi@7
   979
\end{figure}
Madarasi@7
   980
\end{subfigure}
Madarasi@7
   981
\hspace*{1.5cm}
Madarasi@7
   982
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
   983
\begin{figure}[H]
Madarasi@7
   984
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28
   985
\begin{axis}[title=Contact maps ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7
   986
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7
   987
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
   988
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
   989
  format/1000 sep = \kern 0.08em}]
Madarasi@7
   990
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7
   991
\addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7
   992
  size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
Madarasi@7
   993
\end{axis}
Madarasi@7
   994
\end{tikzpicture}
Madarasi@28
   995
\caption{In the case of molecules, there is no significant
Madarasi@28
   996
  difference, but VF2++ performs consistently better.}\label{fig:ISOContact}
Madarasi@7
   997
\end{figure}
Madarasi@7
   998
\end{subfigure}
Madarasi@7
   999
alpar@2
  1000
\begin{center}
Madarasi@7
  1001
\vspace*{-0.5cm}
Madarasi@7
  1002
\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7
  1003
\begin{figure}[H]
Madarasi@7
  1004
\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28
  1005
\begin{axis}[title=Proteins ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7
  1006
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7
  1007
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1008
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1009
  format/1000 sep = \kern 0.08em}]
Madarasi@7
  1010
%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7
  1011
\addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7
  1012
  size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
Madarasi@7
  1013
\end{axis}
Madarasi@7
  1014
\end{tikzpicture}
Madarasi@28
  1015
\caption{On protein graphs, VF2~Plus has a super linear time
Madarasi@7
  1016
  complexity, while VF2++ runs in near constant time. The difference
Madarasi@28
  1017
  is about two orders of magnitude on large graphs.}\label{fig:ISOProt}
Madarasi@7
  1018
\end{figure}
Madarasi@7
  1019
\end{subfigure}
Madarasi@7
  1020
\end{center}
Madarasi@7
  1021
\vspace*{-0.6cm}
Madarasi@28
  1022
\caption{\normalsize{Graph isomorphism problem on biological graphs}}\label{fig:bioISO}
Madarasi@7
  1023
\end{figure}
Madarasi@7
  1024
Madarasi@7
  1025
alpar@2
  1026
alpar@2
  1027
alpar@2
  1028
\subsection{Random graphs}
Madarasi@28
  1029
This section compares VF2++ with VF2~Plus on random graphs of large
alpar@3
  1030
size. The node labels are uniformly distributed.  Let $\delta$ denote
alpar@3
  1031
the average degree.  For the parameters of problems solved in the
alpar@3
  1032
experiments, please see the top of each chart.
Madarasi@28
  1033
\subsubsection{Graph isomorphism problem}
alpar@3
  1034
To evaluate the efficiency of the algorithms in the case of graph
Madarasi@28
  1035
isomorphism problem, random connected graphs of less than 20 000 nodes have been
alpar@3
  1036
considered. Generating a random graph and shuffling its nodes, an
alpar@29
  1037
isomorphism had to be found. Figure~\ref{fig:randISO} shows the running times
alpar@4
  1038
on graph sets of various density.
alpar@2
  1039
Madarasi@7
  1040
Madarasi@7
  1041
Madarasi@7
  1042
Madarasi@12
  1043
\begin{figure}
Madarasi@7
  1044
\vspace*{-1.5cm}
Madarasi@7
  1045
\hspace*{-1.5cm}
Madarasi@7
  1046
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1047
\begin{center}
alpar@2
  1048
\begin{tikzpicture}
Madarasi@28
  1049
\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1050
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1051
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1052
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1053
  format/1000 sep = \kern 0.08em}]
alpar@2
  1054
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1055
\addplot table {randGraph/iso/vf2pIso5_1.txt};
alpar@3
  1056
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1057
        {randGraph/iso/vf2ppIso5_1.txt};
alpar@2
  1058
\end{axis}
alpar@2
  1059
\end{tikzpicture}
alpar@2
  1060
\end{center}
Madarasi@7
  1061
\end{subfigure}
Madarasi@7
  1062
%\hspace{1cm}
Madarasi@7
  1063
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1064
\begin{center}
alpar@2
  1065
\begin{tikzpicture}
Madarasi@28
  1066
\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1067
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1068
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1069
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1070
  format/1000 sep = \kern 0.08em}]
alpar@2
  1071
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1072
\addplot table {randGraph/iso/vf2pIso10_1.txt};
alpar@3
  1073
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1074
        {randGraph/iso/vf2ppIso10_1.txt};
alpar@2
  1075
\end{axis}
alpar@2
  1076
\end{tikzpicture}
alpar@2
  1077
\end{center}
Madarasi@7
  1078
\end{subfigure}
Madarasi@7
  1079
%%\hspace{1cm}
Madarasi@7
  1080
\hspace*{-1.5cm}
Madarasi@7
  1081
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1082
\begin{center}
alpar@2
  1083
\begin{tikzpicture}
Madarasi@28
  1084
\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1085
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1086
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1087
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1088
  format/1000 sep = \kern 0.08em}]
alpar@2
  1089
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1090
\addplot table {randGraph/iso/vf2pIso15_1.txt};
alpar@3
  1091
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1092
        {randGraph/iso/vf2ppIso15_1.txt};
alpar@2
  1093
\end{axis}
alpar@2
  1094
\end{tikzpicture}
alpar@2
  1095
\end{center}
Madarasi@7
  1096
     \end{subfigure}
Madarasi@7
  1097
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1098
\begin{center}
alpar@2
  1099
\begin{tikzpicture}
Madarasi@28
  1100
\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1101
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1102
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1103
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1104
  format/1000 sep = \kern 0.08em}]
alpar@2
  1105
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1106
\addplot table {randGraph/iso/vf2pIso100_1.txt};
alpar@3
  1107
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1108
        {randGraph/iso/vf2ppIso100_1.txt};
alpar@2
  1109
\end{axis}
alpar@2
  1110
\end{tikzpicture}
Madarasi@23
  1111
\end{center}
Madarasi@7
  1112
\end{subfigure}
alpar@2
  1113
\vspace*{-0.8cm}
Madarasi@28
  1114
\caption{Graph isomorphism problem on random graphs
Madarasi@23
  1115
}\label{fig:randISO}
alpar@2
  1116
\end{figure}
alpar@2
  1117
alpar@2
  1118
Madarasi@28
  1119
\subsubsection{Induced subgraph isomorphism problem}
Madarasi@28
  1120
This section presents a comparison of VF2++ and VF2~Plus in the case
Madarasi@28
  1121
of induced subgraph isomorphism problem. In addition to the size of graph $G_2$, that of $G_1$ dramatically influences the hardness of
Madarasi@28
  1122
a given problem too, so the overall picture is provided by examining graphs to be embedded of various size.
alpar@2
  1123
Madarasi@17
  1124
For each chart, a number $0<\rho< 1$ has been fixed, and the following
Madarasi@19
  1125
has been executed 150 times. Generating a large graph $G_{2}$ of an average degree of $\delta$,
Madarasi@28
  1126
choose 10 of its induced subgraphs having $\rho|V_{2}|$ nodes,
Madarasi@28
  1127
and for all the 10 subgraphs find a mapping by using both graph
alpar@3
  1128
matching algorithms.  The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
Madarasi@23
  1129
0.3, 0.8$ cases have been examined, see
Madarasi@28
  1130
Figure~\ref{fig:randIND5},~\ref{fig:randIND10}~and~\ref{fig:randIND35}.
alpar@2
  1131
alpar@2
  1132
alpar@2
  1133
alpar@2
  1134
alpar@2
  1135
Madarasi@12
  1136
\begin{figure}
Madarasi@7
  1137
\vspace*{-1.5cm}
Madarasi@7
  1138
\hspace*{-1.5cm}
alpar@2
  1139
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1140
\begin{center}
alpar@2
  1141
\begin{tikzpicture}
Madarasi@28
  1142
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1143
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1144
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1145
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1146
  format/1000 sep = \kern 0.08em}]
alpar@2
  1147
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1148
\addplot table {randGraph/ind/vf2pInd5_0.05.txt};
alpar@3
  1149
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1150
        {randGraph/ind/vf2ppInd5_0.05.txt};
alpar@2
  1151
\end{axis}
alpar@2
  1152
\end{tikzpicture}
alpar@2
  1153
\end{center}
alpar@2
  1154
     \end{subfigure}
alpar@2
  1155
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1156
\begin{center}
alpar@2
  1157
\begin{tikzpicture}
Madarasi@28
  1158
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1159
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1160
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1161
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1162
  format/1000 sep = \kern 0.08em}]
alpar@2
  1163
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1164
\addplot table {randGraph/ind/vf2pInd5_0.1.txt};
alpar@3
  1165
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1166
        {randGraph/ind/vf2ppInd5_0.1.txt};
alpar@2
  1167
\end{axis}
alpar@2
  1168
\end{tikzpicture}
alpar@2
  1169
\end{center}
alpar@2
  1170
\end{subfigure}
Madarasi@7
  1171
\hspace*{-1.5cm}
alpar@2
  1172
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1173
\begin{center}
alpar@2
  1174
\begin{tikzpicture}
Madarasi@28
  1175
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1176
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1177
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1178
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1179
  format/1000 sep = \kern 0.08em}]
alpar@2
  1180
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1181
\addplot table {randGraph/ind/vf2pInd5_0.3.txt};
alpar@3
  1182
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1183
        {randGraph/ind/vf2ppInd5_0.3.txt};
alpar@2
  1184
\end{axis}
alpar@2
  1185
\end{tikzpicture}
alpar@2
  1186
\end{center}
alpar@2
  1187
     \end{subfigure}
alpar@2
  1188
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1189
\begin{center}
alpar@2
  1190
\begin{tikzpicture}
Madarasi@28
  1191
\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1192
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1193
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1194
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1195
  format/1000 sep = \kern 0.08em}]
alpar@2
  1196
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1197
\addplot table {randGraph/ind/vf2pInd5_0.8.txt};
alpar@3
  1198
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1199
        {randGraph/ind/vf2ppInd5_0.8.txt};
alpar@2
  1200
\end{axis}
alpar@2
  1201
\end{tikzpicture}
Madarasi@23
  1202
\end{center}
alpar@2
  1203
\end{subfigure}
alpar@2
  1204
\vspace*{-0.8cm}
Madarasi@28
  1205
\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
Madarasi@28
  1206
  5}\label{fig:randIND5}
alpar@2
  1207
\end{figure}
alpar@2
  1208
alpar@2
  1209
Madarasi@23
  1210
\begin{figure}
Madarasi@7
  1211
\vspace*{-1.5cm}
Madarasi@7
  1212
\hspace*{-1.5cm}
alpar@2
  1213
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1214
\begin{center}
Madarasi@7
  1215
\hspace*{-0.5cm}
alpar@2
  1216
\begin{tikzpicture}
Madarasi@28
  1217
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1218
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1219
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1220
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1221
  format/1000 sep = \kern 0.08em}]
alpar@2
  1222
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1223
\addplot table {randGraph/ind/vf2pInd10_0.05.txt};
alpar@3
  1224
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1225
        {randGraph/ind/vf2ppInd10_0.05.txt};
alpar@2
  1226
\end{axis}
alpar@2
  1227
\end{tikzpicture}
alpar@2
  1228
\end{center}
alpar@2
  1229
     \end{subfigure}
alpar@2
  1230
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1231
\begin{center}
Madarasi@7
  1232
     \hspace*{-0.5cm}
alpar@2
  1233
\begin{tikzpicture}
Madarasi@28
  1234
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1235
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1236
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1237
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1238
  format/1000 sep = \kern 0.08em}]
alpar@2
  1239
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1240
\addplot table {randGraph/ind/vf2pInd10_0.1.txt};
alpar@3
  1241
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1242
        {randGraph/ind/vf2ppInd10_0.1.txt};
alpar@2
  1243
\end{axis}
alpar@2
  1244
\end{tikzpicture}
alpar@2
  1245
\end{center}
alpar@2
  1246
\end{subfigure}
Madarasi@7
  1247
\hspace*{-1.5cm}
alpar@2
  1248
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1249
\begin{center}
alpar@2
  1250
\begin{tikzpicture}
Madarasi@28
  1251
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1252
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1253
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1254
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1255
  format/1000 sep = \kern 0.08em}]
alpar@2
  1256
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1257
\addplot table {randGraph/ind/vf2pInd10_0.3.txt};
alpar@3
  1258
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1259
        {randGraph/ind/vf2ppInd10_0.3.txt};
alpar@2
  1260
\end{axis}
alpar@2
  1261
\end{tikzpicture}
alpar@2
  1262
\end{center}
alpar@2
  1263
     \end{subfigure}
alpar@2
  1264
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1265
\begin{center}
alpar@2
  1266
\begin{tikzpicture}
Madarasi@28
  1267
\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1268
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1269
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1270
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1271
  format/1000 sep = \kern 0.08em}]
alpar@2
  1272
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1273
\addplot table {randGraph/ind/vf2pInd10_0.8.txt};
alpar@3
  1274
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1275
        {randGraph/ind/vf2ppInd10_0.8.txt};
alpar@2
  1276
\end{axis}
alpar@2
  1277
\end{tikzpicture}
Madarasi@23
  1278
\end{center}
alpar@2
  1279
\end{subfigure}
alpar@2
  1280
\vspace*{-0.8cm}
Madarasi@28
  1281
\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
Madarasi@28
  1282
  10}\label{fig:randIND10}
alpar@2
  1283
\end{figure}
alpar@2
  1284
alpar@2
  1285
alpar@2
  1286
Madarasi@23
  1287
\begin{figure}
Madarasi@7
  1288
\vspace*{-1.5cm}
Madarasi@7
  1289
\hspace*{-1.5cm}
alpar@2
  1290
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1291
\begin{center}
alpar@2
  1292
\begin{tikzpicture}
Madarasi@28
  1293
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1294
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1295
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1296
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1297
  format/1000 sep = \kern 0.08em}]
alpar@2
  1298
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1299
\addplot table {randGraph/ind/vf2pInd35_0.05.txt};
alpar@3
  1300
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1301
        {randGraph/ind/vf2ppInd35_0.05.txt};
alpar@2
  1302
\end{axis}
alpar@2
  1303
\end{tikzpicture}
alpar@2
  1304
\end{center}
alpar@2
  1305
     \end{subfigure}
alpar@2
  1306
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1307
\begin{center}
alpar@2
  1308
\begin{tikzpicture}
Madarasi@28
  1309
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1310
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1311
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1312
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1313
  format/1000 sep = \kern 0.08em}]
alpar@2
  1314
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1315
\addplot table {randGraph/ind/vf2pInd35_0.1.txt};
alpar@3
  1316
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1317
        {randGraph/ind/vf2ppInd35_0.1.txt};
alpar@2
  1318
\end{axis}
alpar@2
  1319
\end{tikzpicture}
alpar@2
  1320
\end{center}
alpar@2
  1321
\end{subfigure}
Madarasi@7
  1322
\hspace*{-1.5cm}
alpar@2
  1323
\begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1324
\begin{center}
alpar@2
  1325
\begin{tikzpicture}
Madarasi@28
  1326
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1327
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1328
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1329
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1330
  format/1000 sep = \kern 0.08em}]
alpar@2
  1331
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1332
\addplot table {randGraph/ind/vf2pInd35_0.3.txt};
alpar@3
  1333
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1334
        {randGraph/ind/vf2ppInd35_0.3.txt};
alpar@2
  1335
\end{axis}
alpar@2
  1336
\end{tikzpicture}
alpar@2
  1337
\end{center}
alpar@2
  1338
     \end{subfigure}
alpar@2
  1339
     \begin{subfigure}[b]{0.55\textwidth}
alpar@2
  1340
\begin{center}
alpar@2
  1341
\begin{tikzpicture}
Madarasi@28
  1342
\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3
  1343
=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3
  1344
  west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28
  1345
  format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28
  1346
  format/1000 sep = \kern 0.08em}]
alpar@2
  1347
%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2
  1348
\addplot table {randGraph/ind/vf2pInd35_0.8.txt};
alpar@3
  1349
\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3
  1350
        {randGraph/ind/vf2ppInd35_0.8.txt};
alpar@2
  1351
\end{axis}
alpar@2
  1352
\end{tikzpicture}
Madarasi@23
  1353
\end{center}
alpar@2
  1354
\end{subfigure}
alpar@2
  1355
\vspace*{-0.8cm}
Madarasi@28
  1356
\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
Madarasi@28
  1357
  35}\label{fig:randIND35}
alpar@2
  1358
\end{figure}
alpar@2
  1359
alpar@2
  1360
alpar@29
  1361
As the experiments above demonstrates, VF2++ is faster than VF2~Plus and able to
alpar@3
  1362
handle really large graphs in milliseconds. Note that when $IND$ was
Madarasi@28
  1363
considered and the graph to be embedded had proportionally few nodes ($\rho =
Madarasi@28
  1364
0.05$, or $\rho = 0.1$), then VF2~Plus produced some inefficient node
alpar@4
  1365
orders (e.g. see the $\delta=10$ case on
Madarasi@17
  1366
Figure~\ref{fig:randIND10}). If these instances had been excluded, the
Madarasi@28
  1367
charts would have looked similarly to the other ones.
alpar@3
  1368
Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
alpar@29
  1369
Plus slow down slightly, but remain practically usable even on graphs
alpar@3
  1370
having 10 000 nodes.
alpar@2
  1371
alpar@2
  1372
alpar@2
  1373
alpar@2
  1374
alpar@3
  1375
alpar@2
  1376
\section{Conclusion}
alpar@29
  1377
This paper presented VF2++, a new graph matching algorithm based on VF2, and analyzed it from a practical point of view.
alpar@2
  1378
alpar@3
  1379
Recognizing the importance of the node order and determining an
alpar@3
  1380
efficient one, VF2++ is able to match graphs of thousands of nodes in
alpar@3
  1381
near practically linear time including preprocessing. In addition to
Madarasi@28
  1382
the proper order, VF2++ uses more efficient cutting
alpar@29
  1383
rules, which are easy to compute and make the algorithm able to prune
alpar@3
  1384
most of the unfruitful branches without going astray.
alpar@2
  1385
alpar@3
  1386
In order to show the efficiency of the new method, it has been
Madarasi@28
  1387
compared to VF2~Plus~\cite{VF2Plus}, which is the best contemporary algorithm.
alpar@2
  1388
Madarasi@28
  1389
The experiments show that VF2++ consistently outperforms VF2~Plus on
alpar@3
  1390
biological graphs. It seems to be asymptotically faster on protein and
Madarasi@28
  1391
on contact map graphs in the case of induced subgraph isomorphism problem,
Madarasi@28
  1392
while in the case of graph isomorphism problem, it has definitely better
alpar@29
  1393
asymptotic behavior on protein graphs.
alpar@2
  1394
alpar@3
  1395
Regarding random sparse graphs, not only has VF2++ proved itself to be
alpar@29
  1396
faster than VF2~Plus, but it also has a practically linear behavior both
Madarasi@28
  1397
in the case of induced subgraph and graph isomorphism problems.
alpar@2
  1398
alpar@25
  1399
%%%%%%%%%%%%%%%%
alpar@25
  1400
\section*{Acknowledgement} \label{sec:ack}
alpar@25
  1401
%%%%%%%%%%%%%%%%
alpar@25
  1402
This research project was initiated and sponsored by QuantumBio
Madarasi@28
  1403
Inc.~\cite{QUANTUMBIO}.
alpar@25
  1404
alpar@25
  1405
The authors were supported by the Hungarian Scientific Research Fund -
alpar@25
  1406
OTKA, K109240 and by the J\'anos Bolyai Research Fellowship program of
alpar@25
  1407
the Hungarian Academy of Sciences.
alpar@2
  1408
alpar@0
  1409
alpar@0
  1410
%% The Appendices part is started with the command \appendix;
alpar@0
  1411
%% appendix sections are then done as normal sections
alpar@0
  1412
%% \appendix
alpar@0
  1413
alpar@0
  1414
%% \section{}
alpar@0
  1415
%% \label{}
alpar@0
  1416
alpar@0
  1417
%% If you have bibdatabase file and want bibtex to generate the
alpar@0
  1418
%% bibitems, please use
alpar@0
  1419
%%
alpar@3
  1420
\bibliographystyle{elsarticle-num} \bibliography{bibliography}
alpar@0
  1421
alpar@0
  1422
%% else use the following coding to input the bibitems directly in the
alpar@0
  1423
%% TeX file.
alpar@0
  1424
alpar@2
  1425
%% \begin{thebibliography}{00}
alpar@0
  1426
alpar@2
  1427
%% %% \bibitem{label}
alpar@2
  1428
%% %% Text of bibliographic item
alpar@0
  1429
alpar@2
  1430
%% \bibitem{}
alpar@0
  1431
alpar@2
  1432
%% \end{thebibliography}
alpar@2
  1433
alpar@0
  1434
\end{document}
alpar@0
  1435
\endinput
alpar@0
  1436
%%
alpar@0
  1437
%% End of file `elsarticle-template-num.tex'.
alpar@29
  1438
alpar@29
  1439
%%  LocalWords:  Subgraph Isomorphism datasets subgraphs subgraph FPT
alpar@29
  1440
%%  LocalWords:  isomorphism hypergraph Ullmann Nauty precompute BFS
alpar@29
  1441
%%  LocalWords:  bijection isomorphisms undirected infeasible dataset
alpar@29
  1442
%%  LocalWords:  preprocessing incrementing decrementing neighbours
alpar@29
  1443
%%  LocalWords:  expectable bioinformatics