damecco.tex
author Madarasi Peter
Sun, 23 Jul 2017 00:00:00 +0200
changeset 28 523fddfd7a01
parent 27 497868c58d36
child 29 0ff72a828b16
permissions -rw-r--r--
Meet Referee's requests
     1 %% 
     2 %% Copyright 2007, 2008, 2009 Elsevier Ltd
     3 %% 
     4 %% This file is part of the 'Elsarticle Bundle'.
     5 %% ---------------------------------------------
     6 %% 
     7 %% It may be distributed under the conditions of the LaTeX Project Public
     8 %% License, either version 1.2 of this license or (at your option) any
     9 %% later version.  The latest version of this license is in
    10 %%    http://www.latex-project.org/lppl.txt
    11 %% and version 1.2 or later is part of all distributions of LaTeX
    12 %% version 1999/12/01 or later.
    13 %% 
    14 %% The list of all files belonging to the 'Elsarticle Bundle' is
    15 %% given in the file `manifest.txt'.
    16 %% 
    17 
    18 %% Template article for Elsevier's document class `elsarticle'
    19 %% with numbered style bibliographic references
    20 %% SP 2008/03/01
    21 
    22 \documentclass[preprint,12pt]{elsarticle}
    23 
    24 %% Use the option review to obtain double line spacing
    25 %% \documentclass[authoryear,preprint,review,12pt]{elsarticle}
    26 
    27 %% Use the options 1p,twocolumn; 3p; 3p,twocolumn; 5p; or 5p,twocolumn
    28 %% for a journal layout:
    29 %% \documentclass[final,1p,times]{elsarticle}
    30 %% \documentclass[final,1p,times,twocolumn]{elsarticle}
    31 %% \documentclass[final,3p,times]{elsarticle}
    32 %% \documentclass[final,3p,times,twocolumn]{elsarticle}
    33 %% \documentclass[final,5p,times]{elsarticle}
    34 %% \documentclass[final,5p,times,twocolumn]{elsarticle}
    35 
    36 %% For including figures, graphicx.sty has been loaded in
    37 %% elsarticle.cls. If you prefer to use the old commands
    38 %% please give \usepackage{epsfig}
    39 
    40 %% The amssymb package provides various useful mathematical symbols
    41 \usepackage{amssymb}
    42 %% The amsthm package provides extended theorem environments
    43 %% \usepackage{amsthm}
    44 
    45 %% The lineno packages adds line numbers. Start line numbering with
    46 %% \begin{linenumbers}, end it with \end{linenumbers}. Or switch it on
    47 %% for the whole article with \linenumbers.
    48 %% \usepackage{lineno}
    49 
    50 \usepackage{amsmath}
    51 %% \usepackage[pdftex]{graphicx}
    52 
    53 \usepackage{pgfplots}
    54 \pgfplotsset{width=9cm}
    55 \pgfplotsset{compat=1.8}
    56 
    57 \usepackage{caption}
    58 \usepackage{subcaption} 
    59 
    60 \usepackage{algorithm}
    61 \usepackage{algpseudocode}
    62 \usepackage{tikz}
    63 
    64 \usepackage{amsthm,amssymb}
    65 \renewcommand{\qedsymbol}{\rule{0.7em}{0.7em}}
    66 
    67 \newtheorem{theorem}{Theorem}[subsection]
    68 \newtheorem{corollary}{Corollary}[theorem]
    69 \newtheorem{claim}[theorem]{Claim}
    70 
    71 \newtheorem{definition}{Definition}[subsection]
    72 \newtheorem{notation}{Notation}[subsection]
    73 \newtheorem{example}{Example}[subsection]
    74 \usetikzlibrary{decorations.markings}
    75 \let\oldproofname=\proofname
    76 %% \renewcommand{\proofname}{\rm\bf{Proof:}}
    77 
    78 \captionsetup{font=normalsize}
    79 
    80 \journal{Discrete Applied Mathematics}
    81 
    82 \begin{document}
    83 
    84 \begin{frontmatter}
    85 
    86 %% Title, authors and addresses
    87 
    88 %% use the tnoteref command within \title for footnotes;
    89 %% use the tnotetext command for theassociated footnote;
    90 %% use the fnref command within \author or \address for footnotes;
    91 %% use the fntext command for theassociated footnote;
    92 %% use the corref command within \author for corresponding author footnotes;
    93 %% use the cortext command for theassociated footnote;
    94 %% use the ead command for the email address,
    95 %% and the form \ead[url] for the home page:
    96 %% \title{Title\tnoteref{label1}}
    97 %% \tnotetext[label1]{}
    98 %% \author{Name\corref{cor1}\fnref{label2}}
    99 %% \ead{email address}
   100 %% \ead[url]{home page}
   101 %% \fntext[label2]{}
   102 %% \cortext[cor1]{}
   103 %% \address{Address\fnref{label3}}
   104 %% \fntext[label3]{}
   105 
   106 \title{VF2++ --- An Improved Subgraph Isomorphism Algorithm}
   107 
   108 %% use optional labels to link authors explicitly to addresses:
   109 %% \author[label1,label2]{}
   110 %% \address[label1]{}
   111 %% \address[label2]{}
   112 
   113 \author[egres,elte]{Alp{\'a}r J{\"u}ttner}
   114 \ead{alpar@cs.elte.hu}
   115 \author[elte]{P{\'e}ter Madarasi}
   116 \ead{madarasip@caesar.elte.hu}
   117 \address[egres]{MTA-ELTE Egerv{\'a}ry Research Group, Budapest, Hungary.}
   118 \address[elte]{Department of Operations Research, E{\"o}tv{\"o}s Lor{\'a}nd University, Budapest, Hungary.}
   119 
   120 \begin{abstract}
   121 
   122   This paper presents a largely improved version of the VF2 algorithm
   123   for the \emph{Subgraph Isomorphism Problem}. The improvements are
   124   twofold. Firstly, it is based on a new approach for determining the
   125   matching order of the nodes, and secondly, more efficient -
   126   nevertheless easier to compute - cutting rules significantly
   127   reducing the search space are applied.
   128 
   129   In addition to the usual \emph{Subgraph Isomorphism Problem}, the paper also
   130   presents specialized algorithms for the \emph{Induced Subgraph
   131     Isomorphism} and for the \emph{Graph Isomorphism Problems}.
   132 
   133   Finally, an extensive experimental evaluation is provided using a
   134   wide range of inputs, including both real-life biological and
   135   chemical datasets and standard randomly generated graph series. The
   136   results show major and consistent running time improvements over the
   137   other known methods.
   138  
   139   The C++ implementations of the algorithms are available open-source as 
   140   part of the LEMON graph and network optimization library.
   141   
   142 \end{abstract}
   143 
   144 \begin{keyword}
   145   Computational Biology, Subgraph Isomorphism Problem
   146 %% keywords here, in the form: keyword \sep keyword
   147 
   148 %% PACS codes here, in the form: \PACS code \sep code
   149 
   150 %% MSC codes here, in the form: \MSC code \sep code
   151 %% or \MSC[2008] code \sep code (2000 is the default)
   152 
   153 \end{keyword}
   154 
   155 \end{frontmatter}
   156 
   157 %% \linenumbers
   158 
   159 %% main text
   160 \section{Introduction}
   161 \label{sec:intro}
   162 
   163 In the last decades, combinatorial structures, and especially graphs
   164 have been considered with ever increasing interest, and applied to the
   165 solution of several new and revised questions.  The expressiveness,
   166 the simplicity and the studiedness of graphs make them practical for
   167 modelling and appear constantly in several seemingly independent
   168 fields, such as bioinformatics and chemistry.
   169 
   170 Complex biological systems arise from the interaction and cooperation
   171 of plenty of molecular components. Getting acquainted with such
   172 systems at the molecular level is of primary importance, since
   173 protein-protein interaction, DNA-protein interaction, metabolic
   174 interaction, transcription factor binding, neuronal networks, and
   175 hormone signalling networks can be understood this way.
   176 
   177 Many chemical and biological structures can easily be modelled
   178 as graphs, for instance, a molecular structure can be
   179 considered as a graph, whose nodes correspond to atoms and whose
   180 edges to chemical bonds. The similarity and dissimilarity of
   181 objects corresponding to nodes are incorporated to the model
   182 by \emph{node labels}. Understanding such networks basically
   183 requires finding specific subgraphs, thus it calls for efficient
   184 graph matching algorithms.
   185 
   186 Other real-world fields related to some
   187 variants of graph matching include pattern recognition
   188 and machine vision~\cite{HorstBunkeApplications}, symbol recognition~\cite{CordellaVentoSymbolRecognition}, and face identification~\cite{JianzhuangYongFaceIdentification}.  \\
   189 
   190 Subgraph and induced subgraph matching problems are known to be
   191 NP-Complete~\cite{SubgraphNPC}, while the graph isomorphism problem is
   192 one of the few problems in NP neither known to be in P nor
   193 NP-Complete. Although polynomial-time isomorphism algorithms are known
   194 for various graph classes, like trees and planar
   195 graphs~\cite{PlanarGraphIso}, bounded valence
   196 graphs~\cite{BondedDegGraphIso}, interval graphs~\cite{IntervalGraphIso}
   197 or permutation graphs~\cite{PermGraphIso}. Furthermore, an FPT algorithm has also been presented for the coloured hypergraph isomorphism problem in~\cite{ColoredHiperGraphIso}.
   198 
   199 In the following, some algorithms based on other approaches are
   200 summarized, which do not need any restrictions on the graphs. Even though,
   201 an overall polynomial behaviour is not expectable from such an
   202 alternative, it may often have good practical performance, in fact,
   203 it might be the best choice even on a graph class for which polynomial
   204 algorithm is known.
   205 
   206 The first practically usable approach was due to
   207 \emph{Ullmann}~\cite{Ullmann}, which is a commonly used algorithm based on depth-first
   208 search with a complex heuristic for reducing the
   209 number of visited states. A major problem is its $\Theta(n^3)$ space
   210 complexity, which makes it impractical in the case of big sparse
   211 graphs.
   212 
   213 In a recent paper, Ullmann~\cite{UllmannBit} presents an
   214 improved version of this algorithm based on a bit-vector solution for
   215 the binary Constraint Satisfaction Problem.
   216 
   217 The \emph{Nauty} algorithm~\cite{Nauty} transforms the two graphs to
   218 a canonical form before starting to check for isomorphism. It has
   219 been considered as one of the fastest graph isomorphism algorithms,
   220 although graph categories were shown in which it takes exponentially
   221 many steps. This algorithm handles only the graph isomorphism problem.
   222 
   223 The \emph{LAD} algorithm~\cite{Lad} uses a depth-first search
   224 strategy and formulates the matching as a Constraint Satisfaction
   225 Problem to prune the search tree. The constraints are that the mapping
   226 has to be injective and edge-preserving, hence it is possible to
   227 handle new matching types as well.
   228 
   229 The \emph{RI} algorithm~\cite{RI} and its variations are based on a
   230 state space representation. After reordering the nodes of the graphs,
   231 it uses some fast executable heuristic checks without using any
   232 complex pruning rules. It seems to run really efficiently on graphs
   233 coming from biology, and won the International Contest on Pattern
   234 Search in Biological Databases~\cite{Content}.
   235 
   236 Currently, the most commonly used algorithm is the
   237 \emph{VF2}~\cite{VF2}, the improved version of \emph{VF}~\cite{VF}, which was
   238 designed for solving pattern matching and computer vision problems,
   239 and has been one of the best overall algorithms for more than a
   240 decade. Although, it is not as fast as some of the new specialized algorithms, it is still widely used due to its simplicity and space efficiency. VF2 uses
   241 a state space representation and checks some conditions in each state
   242 to prune the search tree.
   243 
   244 Meanwhile, another variant called \emph{VF2~Plus}~\cite{VF2Plus} has
   245 been published. It is considered to be as efficient as the RI
   246 algorithm and has a strictly better behaviour on large graphs.  The
   247 main idea of VF2~Plus is to precompute a heuristic node order of the graph to be embedded, in which VF2 works more efficiently.
   248 
   249 This paper introduces \emph{VF2++}, a new further improved algorithm
   250 for the graph and (induced) subgraph isomorphism problems, which uses
   251 efficient cutting rules and determines a node order in which VF2 runs
   252 significantly faster on practical inputs.
   253 
   254 The rest of the paper is structured as
   255 follows. Section~\ref{sec:ProbStat} defines the exact problems to be
   256 solved, Section~\ref{sec:VF2Alg} provides a description of VF2. Based
   257 on that, Section~\ref{sec:VF2ppAlg} introduces VF2++. Some technical
   258 details necessary for an efficient implementation are discussed in
   259 Section~\ref{sec:VF2ppImpl}.  Finally, Section~\ref{sec:ExpRes}
   260 provides a detailed experimental evaluation of VF2++ and its comparison
   261 to the state-of-the-art algorithm.
   262 
   263 It must also be mentioned that the C++ implementations of the
   264 algorithms have been made available for evaluation and use under an
   265 open-source license as a part of LEMON~\cite{LEMON} graph
   266 library.
   267 
   268 \section{Problem Statement}\label{sec:ProbStat}
   269 This section provides a formal description of the problems to be
   270 solved.
   271 \subsection{Definitions}
   272 
   273 Throughout the paper $G_{1}=(V_{1}, E_{1})$ and
   274 $G_{2}=(V_{2}, E_{2})$ denote two undirected graphs.
   275 
   276 \begin{definition}
   277 $\mathcal{L}: (V_{1}\cup V_{2}) \longrightarrow K$ is a \textbf{node
   278     label function}, where K is an arbitrary set. The elements in K
   279   are the \textbf{node labels}. Two nodes, u and v are said to be
   280   \textbf{equivalent} if $\mathcal{L}(u)=\mathcal{L}(v)$.
   281 \end{definition}
   282 
   283 For the sake of simplicity, in this paper the graph, subgraph and induced subgraph isomorphisms are defined in a more general way.
   284 
   285 \begin{definition}\label{sec:ismorphic}
   286 $G_{1}$ and $G_{2}$ are \textbf{isomorphic} (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
   287   V_{1} \longrightarrow V_{2}$ bijection, for which the
   288   following is true:
   289 \begin{center}
   290 $\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
   291 $\forall u,v\in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in{E_{2}}$
   292 \end{center}
   293 \end{definition}
   294 
   295 \begin{definition}
   296 $G_{1}$ is a \textbf{subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
   297   V_{1}\longrightarrow V_{2}$ injection, for which the
   298   following is true:
   299 \begin{center}
   300 $\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
   301 $\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Rightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
   302 \end{center}
   303 \end{definition}
   304 
   305 \begin{definition} 
   306 $G_{1}$ is an \textbf{induced subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists
   307   \mathfrak{m}: V_{1}\longrightarrow V_{2}$ injection, for which the
   308   following is true:
   309 \begin{center}
   310 $\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and
   311 
   312 $\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow
   313   (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
   314 \end{center}
   315 \end{definition}
   316 
   317 
   318 \subsection{Common problems}\label{sec:CommProb}
   319 
   320 The focus of this paper is on the following problems appearing in many applications.
   321 
   322 The \textbf{subgraph isomorphism problem} is the following: is
   323 $G_{1}$ isomorphic to any subgraph of $G_{2}$ by a given node
   324 label?
   325 
   326 The \textbf{induced subgraph isomorphism problem} asks the same about the
   327 existence of an induced subgraph.
   328 
   329 The \textbf{graph isomorphism problem} can be defined as induced
   330 subgraph matching problem where the sizes of the two graphs are equal.
   331 
   332 In addition, one may want to find a \textbf{single} embedding or \textbf{enumerate} all of them.
   333 
   334 \section{The VF2 Algorithm}\label{sec:VF2Alg}
   335 This algorithm is the basis of both the VF2++ and the VF2~Plus.  VF2
   336 is able to handle all the variations mentioned in Section~\ref{sec:CommProb}.  Although it can also handle directed graphs,
   337 for the sake of simplicity, only the undirected case will be
   338 discussed.
   339 
   340 
   341 \subsection{Common notations}
   342 \indent Assume $G_{1}$ is searched in $G_{2}$.  The following
   343 definitions and notations will be used throughout this paper.
   344 \begin{definition}
   345 An injection $\mathfrak{m} : D \longrightarrow V_2$ is called (partial) \textbf{mapping}, where $D\subseteq V_1$.
   346 \end{definition}
   347 
   348 \begin{notation}
   349 $\mathfrak{D}(f)$ and $\mathfrak{R}(f)$ denote the domain and the range of a function $f$, respectively.
   350 \end{notation}
   351 
   352 \begin{definition}
   353 Mapping $\mathfrak{m}$ \textbf{covers} a node $u\in V_1\cup V_2$ if $u\in \mathfrak{D}(\mathfrak{m})\cup \mathfrak{R}(\mathfrak{m})$.
   354 \end{definition}
   355 
   356 \begin{definition}
   357 A mapping $\mathfrak{m}$ is $\mathbf{whole\ mapping}$ if $\mathfrak{m}$ covers all the
   358 nodes of $V_{1}$, i.e. $\mathfrak{D}(\mathfrak{m})=V_1$.
   359 \end{definition}
   360 
   361 \begin{definition}
   362 Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : f(w)=\mathfrak{m}(w)$ and $f(u)=v$ holds, where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$; otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
   363 \end{definition}
   364 
   365 \begin{notation}
   366 Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
   367 which can be substituted by any of the $\mathbf{SUB}$, $\mathbf{IND}$
   368 and $\mathbf{ISO}$ problems, which stand for the the problems mentioned in Section~\ref{sec:CommProb} respectively.
   369 \end{notation}
   370 
   371 \begin{definition}
   372 Let $\mathfrak{m}$ be a mapping. The \textbf{consistency function for } $\mathbf{PT}$ is a logical function, for which $\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if and only if $\mathfrak{m}$ satisfies the requirements of $\mathbf{PT}$ considering the subgraphs of $G_{1}$ and $G_{2}$ induced by $\mathfrak{D}(\mathfrak{m})$ and $\mathfrak{R}(\mathfrak{m})$, respectively.
   373 
   374 %$\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
   375 \end{definition}
   376 
   377 \begin{definition} 
   378 Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
   379 \textbf{cutting function for } $\mathbf{PT}$ if the following
   380 holds. $\mathbf{Cut_{PT}(\mathfrak{m})}$ is false if there exists a sequence of extend operations, which results in a whole mapping satisfying the requirements of $PT$.
   381 \end{definition}
   382 
   383 \begin{definition}
   384 A mapping $\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
   385   $Cons_{PT}(\mathfrak{m})$ is true.
   386 \end{definition}
   387 
   388 $Cons_{PT}$ and $Cut_{PT}$ will often be used in the following form.
   389 \begin{notation}
   390 Let $\mathbf{Cons_{PT}(p, \mathfrak{m})}:=Cons_{PT}(extend(\mathfrak{m},p))$, and
   391 $\mathbf{Cut_{PT}(p, \mathfrak{m})}:=Cut_{PT}(extend(\mathfrak{m},p))$, where
   392 $p\in{V_{1}\backslash\mathfrak{D}(\mathfrak{m}) \!\times\!V_{2}\backslash\mathfrak{R}(\mathfrak{m})}$.
   393 \end{notation}
   394 
   395 $Cons_{PT}$ will be used to check the consistency of the already
   396 covered nodes, while $Cut_{PT}$ is for looking ahead to recognize if
   397 no whole consistent mapping can contain the current mapping.
   398 
   399 \subsection{Overview of the algorithm}
   400 
   401 VF2 begins with an empty mapping and gradually extends it with respect to the consistency and cutting functions until a whole mapping is reached.
   402 
   403 Algorithm~\ref{alg:VF2Pseu} is a high-level description of
   404 the VF2 algorithm. Each state of the matching process can
   405 be associated with a mapping $\mathfrak{m}$. The initial state
   406 is associated with a mapping $\mathfrak{m}$, for which
   407 $\mathfrak{D}(\mathfrak{m})=\emptyset$, i.e. it starts with an empty mapping.
   408 
   409 
   410 \begin{algorithm}
   411 \algtext*{EndIf}%ne nyomtasson end if-et
   412 \algtext*{EndFor}%ne
   413 \algtext*{EndProcedure}%ne nyomtasson ..
   414 \caption{\hspace{0.5cm}$A\ high\ level\ description\ of\ VF2$}\label{alg:VF2Pseu}
   415 \begin{algorithmic}[1]
   416 
   417 \Procedure{VF2}{Mapping $\mathfrak{m}$, ProblemType $PT$}
   418   \If{$\mathfrak{m}$ covers
   419     $V_{1}$} \State Output($\mathfrak{m}$)
   420   \Else
   421   \State Compute the set $P_\mathfrak{m}$ of the candidate pairs for extending $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
   422     $\neg$Cut$_{PT}$($p,\mathfrak{m}$)}
   423     \State \textbf{call}
   424   VF2($extend(\mathfrak{m},p)$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
   425 \end{algorithmic}
   426 \end{algorithm}
   427 
   428 
   429 For the current mapping $\mathfrak{m}$, the algorithm computes $P_\mathfrak{m}$, the set of
   430 candidate node pairs for extending the current mapping $\mathfrak{m}$.
   431 
   432 For each pair $p$ in $P_\mathfrak{m}$, $Cons_{PT}(p,\mathfrak{m})$ and
   433 $Cut_{PT}(p,\mathfrak{m})$ are evaluated. If the former is true and
   434 the latter is false, the whole process is recursively applied to
   435 $extend(\mathfrak{m},p)$. Otherwise, $extend(\mathfrak{m},p)$ is not consistent by $PT$, or it
   436 can be proved that $\mathfrak{m}$ can not be extended to a whole mapping.
   437 
   438 In order to make sure of the correctness, see Claim~\ref{claim:consMapps}.
   439 \begin{claim}\label{claim:consMapps}
   440 Through consistent mappings, only consistent whole mappings can be
   441 reached, and all the consistent whole mappings are reachable through
   442 consistent mappings.
   443 \end{claim}
   444 
   445 Note that a mapping may be reached in exponentially many different ways, since the
   446 order of extensions does not influence the nascent mapping.
   447 
   448 However, one may make the following observations.
   449 
   450 %\begin{claim}
   451 %\label{claim:claimTotOrd}
   452 %Let $\prec$ be an arbitrary total ordering relation on $V_{1}$.  If
   453 %the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
   454 %\begin{center}
   455 %$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
   456 %\end{center}
   457 %then no mapping can be reached more than once, and each whole mapping %remains reachable.
   458 %\end{claim}
   459 
   460 \begin{definition}
   461 A total order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
   462 $V_{1}$ is \textbf{matching order} if VF2 can cover $u_{\sigma(d)}$ on the $d$-th level for all $d\in\{1,..,|V_{1}|\}$.
   463 \end{definition}
   464 
   465 \begin{claim}
   466 \label{claim:claimTotOrd}
   467 If VF2 is prescribed to cover the nodes of $G_1$ according to a matching order, then no mapping can be reached more than once and each whole mapping remains reachable.
   468 \end{claim}
   469 
   470 Note that the cornerstone of the improvements to VF2 is to choose a proper
   471 matching order.
   472 
   473 \subsection{The candidate set}
   474 \label{candidateComputingVF2}
   475 Let $P_\mathfrak{m}$ be the set of the candidate pairs for inclusion in $\mathfrak{m}$.
   476 
   477 \begin{notation}
   478 Let $\mathbf{T_{1}(\mathfrak{m})}:=\{u \in V_{1}\backslash\mathfrak{D}(\mathfrak{m}) : \exists \tilde{u}\in{\mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}}\}$, and
   479  $\mathbf{T_{2}(\mathfrak{m})} := \{v \in V_{2}\backslash\mathfrak{R}(\mathfrak{m}) : \exists\tilde{v}\in{\mathfrak{R}(\mathfrak{m}):(v,\tilde{v})\in E_{2}}\}$.
   480 \end{notation}
   481 
   482 The set $P_\mathfrak{m}$ contains the pairs of uncovered neighbours of covered
   483 nodes, and if there is not such a node pair, all the pairs containing
   484 two uncovered nodes are added. Formally, let
   485 \[
   486  P_\mathfrak{m}\!=\!
   487   \begin{cases} 
   488    T_{1}(\mathfrak{m})\times T_{2}(\mathfrak{m})&\hspace{-0.15cm}\text{if }
   489    T_{1}(\mathfrak{m})\!\neq\!\emptyset\ \text{and }T_{2}(\mathfrak{m})\!\neq
   490    \emptyset,\\ (V_{1}\!\setminus\!\mathfrak{D}(\mathfrak{m}))\!\times\!(V_{2}\!\setminus\!\mathfrak{R}(\mathfrak{m}))
   491    &\hspace{-0.15cm}\text{otherwise}.
   492   \end{cases}
   493 \]
   494 
   495 \subsection{Consistency}
   496 Let $p=(u,v)\in V_{1}\times V_{2}$, and suppose $\mathfrak{m}$ is a consistent mapping by
   497 $PT$. $Cons_{PT}(p,\mathfrak{m})$ checks whether
   498 adding pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
   499 
   500 For example, the consistency function of the induced subgraph isomorphism problem is the following.
   501 \begin{notation}
   502 Let $\mathbf{\Gamma_{1} (u)}:=\{\tilde{u}\in V_{1} :
   503 (u,\tilde{u})\in E_{1}\}$, and $\mathbf{\Gamma_{2}
   504   (v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$. That is, $\mathbf{\Gamma_{i} (w)}$ denotes the set of neighbours of node $w$ in $G_i$ $(i=1,2)$.
   505 \end{notation}
   506 
   507 \begin{claim}
   508 $extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ if and only if $\mathfrak{m}$ is consistent and $(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
   509 \Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$. 
   510 \end{claim}
   511 
   512 The following formulation gives an efficient way of calculating $Cons_{IND}$.
   513 
   514 \begin{claim}
   515 $Cons_{IND}((u,v),\mathfrak{m}):=Cons_{IND}(\mathfrak{m})\wedge\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
   516   (\forall \tilde{u}\in \Gamma_{1}(u)
   517   \cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is the consistency function for $IND$.
   518 \end{claim}
   519 
   520 \subsection{Cutting rules}
   521 $Cut_{PT}(p,\mathfrak{m})$ is defined by a collection of efficiently
   522 verifiable conditions. The requirement is that $Cut_{PT}(p,\mathfrak{m})$ can
   523 be true only if it is impossible to extend $extend(\mathfrak{m},p)$ to a
   524 whole mapping.
   525 
   526 As an example, a cutting function of induced subgraph isomorphism problem is presented.
   527 \begin{notation}
   528 Let $\mathbf{\tilde{T}_{1}}(\mathfrak{m}):=(V_{1}\backslash
   529 \mathfrak{D}(\mathfrak{m}))\backslash T_{1}(\mathfrak{m})$, and
   530 \\ $\mathbf{\tilde{T}_{2}}(\mathfrak{m}):=(V_{2}\backslash
   531 \mathfrak{R}(\mathfrak{m}))\backslash T_{2}(\mathfrak{m})$.
   532 \end{notation}
   533 
   534 \begin{claim}
   535 $Cut_{IND}((u,v),\mathfrak{m}):= |\Gamma_{2} (v)\ \cap\ T_{2}(\mathfrak{m})| <
   536   |\Gamma_{1} (u)\ \cap\ T_{1}(\mathfrak{m})| \vee |\Gamma_{2}(v)\cap
   537   \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}(u)\cap
   538   \tilde{T}_{1}(\mathfrak{m})|$ is a cutting function for $IND$.
   539 \end{claim}
   540 
   541 \section{The VF2++ Algorithm}\label{sec:VF2ppAlg}
   542 Although any matching order makes the search space of VF2 a
   543 tree, its choice turns out to dramatically influence the number of
   544 visited states. The goal is to determine an efficient one as quickly
   545 as possible.
   546 
   547 The main reason for the superiority of VF2++ over VF2 is twofold. Firstly,
   548 taking into account the structure and the node labelling of the graph,
   549 VF2++ determines a matching order in which most of the unfruitful
   550 branches of the search space can be pruned immediately. Secondly,
   551 introducing more efficient --- nevertheless still easier to compute
   552 --- cutting rules reduces the chance of going astray even further.
   553 
   554 In addition to the usual subgraph isomorphism problem, specialized versions
   555 for induced subgraph and graph isomorphism problems have been
   556 designed.
   557 
   558 Note that a weaker version of the cutting rules of VF2++ and an efficient
   559 candidate set calculation method were described in~\cite{VF2Plus}.
   560 
   561 It should be noted that all the methods described in this section are
   562 extendable to handle directed graphs and edge labels as well.
   563 The basic ideas and the detailed description of VF2++ are provided in
   564 the following.\newline
   565 
   566 The goal is to find a matching order in which the algorithm is able to
   567 recognize inconsistency or prune the infeasible branches on the
   568 highest levels and goes deep only if it is needed.
   569 
   570 \begin{notation}
   571 Let $\mathbf{Conn_{H}(u)}:=|\Gamma_{1}(u)\cap H|$, that is the
   572 number of neighbours of u which are in H, where $u\in V_{1} $ and
   573 $H\subseteq V_{1}$.
   574 \end{notation}
   575 
   576 The principal question is the following. Suppose a mapping $\mathfrak{m}$ is
   577 given. For which node of $T_{1}(\mathfrak{m})$ is the hardest to find a
   578 consistent pair in $G_{2}$? The more covered neighbours a node in
   579 $T_{1}(\mathfrak{m})$ has --- i.e. the largest $Conn_{\mathfrak{D}(\mathfrak{m})}$ it has
   580 ---, the more rarely satisfiable consistency constraints for its pair
   581 are given.
   582 
   583 Most of the graphs of biological and chemical structures are sparse, thus several nodes in
   584 $T_{1}(\mathfrak{m})$ may have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$, which makes
   585 reasonable to define a secondary and a tertiary order between them.
   586 The observation above proves itself to be as determining, that the
   587 secondary ordering prefers nodes with the most uncovered neighbours
   588 among which have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$ to increase
   589 $Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes as much, as possible.  The tertiary ordering prefers nodes having the rarest uncovered labels in $G_2$.
   590 
   591 Note that the secondary ordering is the same as ordering by degrees,
   592 which is a static data in front of the above used.
   593 
   594 These rules can easily result in a matching order which contains the
   595 nodes of a long path successively, whose nodes may have low $Conn$ and
   596 is easily matchable into $G_{2}$. To try to avoid that, a Breadth-first-search order is used, and on each of its levels, the ordering procedure described above is applied.
   597 \newline
   598 
   599 In the following, some examples on which the VF2 may be slow are
   600 described, although they are easily solvable by using a proper
   601 matching order.
   602 
   603 \begin{example}
   604 Suppose $G_{1}$ can be mapped into $G_{2}$ in many ways
   605 without node labels. Let $u\in V_{1}$ and $v\in V_{2}$.
   606 \newline
   607 $\mathcal{L}(u):=black$
   608 \newline
   609 $\mathcal{L}(v):=black$
   610 \newline
   611 $\mathcal{L}(\tilde{u}):=red \ \forall \tilde{u}\in V_{1}\backslash
   612 \{u\}$
   613 \newline
   614 $\mathcal{L}(\tilde{v}):=red \ \forall \tilde{v}\in V_{2}\backslash
   615 \{v\}$
   616 
   617 Now, any mapping by $\mathcal{L}$ must contain $(u,v)$, since
   618 $u$ is black and no node in $V_{2}$ has a black label except
   619 $v$. If unfortunately $u$ were the last node which will get covered,
   620 VF2 would check only in the last steps, whether $u$ can be matched to
   621 $v$.
   622 
   623 However, had $u$ been the first matched node, u would have been
   624 matched immediately to v, so all the mappings would have been
   625 precluded in which node labels can not correspond.
   626 \end{example}
   627 
   628 \begin{example}
   629 Suppose there is no node label given, and $G_{1}$ is a small graph that can not be mapped into $G_{2}$ and $u\in V_{1}$.
   630 \newline
   631 Let $G'_{1}:=(V_{1}\cup
   632 \{u'_{1},u'_{2},..,u'_{k}\},E_{1}\cup
   633 \{(u,u'_{1}),(u'_{1},u'_{2}),..,(u'_{k-1},u'_{k})\})$, that is,
   634 $G'_{1}$ is $G_{1}\cup \{ a\ k$ long path, which is disjoint
   635 from $G_{1}$ and one of its starting points is connected to $u\in
   636 V_{1}\}$.
   637 
   638 If unfortunately the nodes of the path were the first $k$ nodes in the
   639 matching order, the algorithm would iterate through all the possible k
   640 long paths in $G_{2}$, and it would recognize that no path can be
   641 extended to $G'_{1}$.
   642 \newline
   643 However, had it started by the matching of $G_{1}$, it would not
   644 have matched any nodes of the path.
   645 \end{example}
   646 
   647 These examples may look artificial, but the same problems also appear
   648 in real-world instances, even though in a less obvious way.
   649 
   650 %\subsection{Preparations}
   651 %\begin{claim}
   652 %\label{claim:claimCoverFromLeft}
   653 %The total ordering relation uniquely determines a node order, in which
   654 %the nodes of $V_{1}$ will be covered by VF2. From the point of
   655 %view of the matching procedure, this means, that always the same node
   656 %of $G_{1}$ will be covered on the $d$-th level.
   657 %\end{claim}
   658 
   659 %\begin{definition}
   660 %An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
   661 %$V_{1}$ is \textbf{matching order} if there exists $\prec$ total
   662 %ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
   663 %pair for $u_{\sigma(d)}$ for all $d\in\{1,..,|V_{1}|\}$.
   664 %\end{definition}
   665 
   666 %\begin{claim}\label{claim:MOclaim}
   667 %A total ordering is matching order iff the nodes of every component
   668 %form an interval in the node sequence, and every node connects to a
   669 %previous node in its component except the first node of each component.
   670 %\end{claim}
   671 
   672 %In summary, a total ordering always uniquely determines a matching
   673 %order, and every matching order can be determined by a total ordering,
   674 %however, more than one different total orderings may determine the
   675 %same matching order.
   676 
   677 \subsection{Matching order}
   678 \begin{notation}
   679 Let \textbf{F$_\mathcal{M}$(l)}$:=|\{v\in V_{2} :
   680 l=\mathcal{L}(v)\}|-|\{u\in \mathcal{M} : l=\mathcal{L}(u)\}|$,
   681 where $l$ is a label and $\mathcal{M}\subseteq V_{1}$.
   682 \end{notation}
   683 
   684 \begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{(-f)}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
   685 \end{definition}
   686 
   687 \begin{notation}
   688 Let $deg(v)$ denote the degree of node $v$.
   689 \end{notation}
   690 
   691 \begin{algorithm}[H]
   692 \algtext*{EndIf}
   693 \algtext*{EndProcedure}
   694 \algtext*{EndWhile}
   695 \algtext*{EndFor}
   696 \caption{\hspace{0.5cm}$The\ method\ of\ VF2++\ for\ determining\ the\ node\ order$}\label{alg:VF2PPPseu}
   697 \begin{algorithmic}[1]
   698 \Procedure{VF2++order}{} \State $\mathcal{M}$ := $\emptyset$
   699 \Comment{matching order} \While{$V_{1}\backslash \mathcal{M}
   700   \neq\emptyset$} \State $r\in$ arg max$_{deg}$ (arg
   701 min$_{F_\mathcal{M}\circ \mathcal{L}}(V_{1}\backslash
   702 \mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
   703 root node $r$.  \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
   704 $d$-th level \State Process $V_d$ \Comment{See Algorithm~\ref{alg:VF2PPProcess1}} \EndFor
   705 \EndWhile \EndProcedure
   706 \end{algorithmic}
   707 \end{algorithm}
   708 
   709 \begin{algorithm}
   710 \algtext*{EndIf}
   711 \algtext*{EndProcedure}%ne nyomtasson ..
   712 \algtext*{EndWhile}
   713 \caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
   714 \begin{algorithmic}[1]
   715 \Procedure{VF2++ProcessLevel}{$V_{d}$} \While{$V_d\neq\emptyset$}
   716 \State $m\in$ arg min$_{F_\mathcal{M}\circ\ \mathcal{L}}($ arg max$_{deg}($arg
   717 max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
   718 \State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
   719 $F_\mathcal{M}$ \EndWhile \EndProcedure
   720 \end{algorithmic}
   721 \end{algorithm}
   722 
   723 Algorithm~\ref{alg:VF2PPPseu} is a high-level description of the
   724 matching order procedure of VF2++. It computes a BFS tree for each
   725 component in ascending order of their rarest node labels and largest $deg$,
   726 whose root vertex is the minimal node of its component. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
   727 lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
   728 to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
   729 
   730 \begin{claim}
   731 Algorithm~\ref{alg:VF2PPPseu} provides a matching order.
   732 \end{claim}
   733 
   734 
   735 \subsection{Cutting rules}
   736 \label{VF2PPCuttingRules}
   737 This section presents the cutting rules of VF2++, which are improved by using extra information coming from the node labels.
   738 \begin{notation}
   739 Let $\mathbf{\Gamma_{1}^{l}(u)}:=\{\tilde{u} : \mathcal{L}(\tilde{u})=l
   740 \wedge \tilde{u}\in \Gamma_{1} (u)\}$ and
   741 $\mathbf{\Gamma_{2}^{l}(v)}:=\{\tilde{v} : \mathcal{L}(\tilde{v})=l \wedge
   742 \tilde{v}\in \Gamma_{2} (v)\}$, where $u\in V_{1}$, $v\in
   743 V_{2}$ and $l$ is a label.
   744 \end{notation}
   745 
   746 \begin{claim}[Cutting function for ISO]
   747 \[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!\neq\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\  \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| \neq |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for ISO.
   748 \end{claim}
   749 
   750 \begin{claim}[Cutting function for IND]
   751 \[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for IND.
   752 \end{claim}
   753 
   754 \begin{claim}[Cutting function for SUB]
   755 \[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\] is a cutting function for SUB.
   756 \end{claim}
   757 
   758 
   759 
   760 \section{Implementation details}\label{sec:VF2ppImpl}
   761 This section provides a detailed summary of an efficient
   762 implementation of VF2++.
   763 \begin{notation}
   764 Let $\Delta_1$ and $\Delta_2$ denote the largest degree in $G_1$ and $G_2$, respectively, and let $\Delta=\max\{\Delta_1,\Delta_2\}$.
   765 \end{notation}
   766 \subsection{Storing a mapping}
   767 After fixing an arbitrary node order ($u_0, u_1, ..,
   768 u_{|V_{1}|-1}$) of $G_{1}$, an array $M$ can be used to store
   769 the current mapping in the following way.
   770 \[
   771  M[i] =
   772   \begin{cases} 
   773    v & if\ (u_i,v)\ is\ in\ the\ mapping\\ INV\!ALI\!D &
   774    if\ no\ node\ has\ been\ mapped\ to\ u_i,
   775   \end{cases}
   776 \]
   777 where $i\in\{0,1, ..,|V_{1}|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
   778 means "no node".
   779 \subsection{Avoiding the recurrence}
   780 The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
   781 as a \textit{while loop}, which has a loop counter $depth$ denoting the
   782 current depth of the recursion. Fixing a matching order, let $M$
   783 denote the array storing the current mapping. Observe that
   784 $M$ is $INV\!ALI\!D$ from index $depth$+1 and not $INV\!ALI\!D$ before
   785 $depth$. $M[depth]$ changes
   786 while the state is being processed, but the property is held before
   787 both stepping back to a predecessor state and exploring a successor
   788 state.
   789 
   790 The necessary part of the candidate set is easily maintainable or
   791 computable by following
   792 the steps described in Section~\ref{candidateComputingVF2}. A much faster method
   793 has been designed for biological and sparse graphs, see the next
   794 section for details.
   795 
   796 \subsection{Calculating the candidates for a node}
   797 The task is not to maintain the candidate set, but to generate the
   798 candidate nodes in $G_{2}$ for a given node $u\in V_{1}$.  In
   799 case of any of the three problem types and a mapping $\mathfrak{m}$, if a node $v\in
   800 V_{2}$ is a potential pair of $u\in V_{1}$, then $\forall
   801 u'\in \mathfrak{D}(\mathfrak{m}) : (u,u')\in
   802 E_{1}\Rightarrow (v,\mathfrak{m}(u'))\in
   803 E_{2}$. That is, each covered neighbour of $u$ has to be mapped to
   804 a covered neighbour of $v$, i.e. selecting arbitrarily a covered neighbour $u'$ of $u$, all of the admissible candidates for $u$ are among the neighbours of $\mathfrak{m}(u')$.
   805 
   806 Having said that, an algorithm running in $\Theta(\Delta_2)$ time is
   807 describable if there exists a covered node in the component containing
   808 $u$, and a linear one otherwise.
   809 
   810 
   811 \subsection{Determining the node order}
   812 This section describes how the node order preprocessing method of
   813 VF2++ can efficiently be implemented.
   814 
   815 For using lookup tables, the node labels are associated with the
   816 numbers $\{0,1,..,|K|-1\}$, where $K$ is the set of the labels. It
   817 enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
   818 $\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
   819 in $V_{2}$ having label $i$, which is easy to compute in
   820 $\Theta(|V_{2}|)$ steps.
   821 
   822 Representing $\mathcal{M}\subseteq V_{1}$ as an array of
   823 size $|V_{1}|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done in-place by swapping nodes.
   824 
   825 \subsection{Cutting rules}
   826 In Section~\ref{VF2PPCuttingRules}, the cutting rules were
   827 described using the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
   828 and $\tilde T_{2}$, which are dependent on the current mapping. The aim is to check the labelled cutting
   829 rules of VF2++ in $\Theta(\Delta)$ time.
   830 
   831 Firstly, suppose that these four sets are given in such a way, that
   832 checking whether a node is in a certain set takes constant time,
   833 e.g. they are given by their 0-1 characteristic vectors. Let $L$ be an
   834 initially zero integer lookup table of size $|K|$. After incrementing
   835 $L[\mathcal{L}(u')]$ for all $u'\in \Gamma_{1}(u) \cap T_{1}(\mathfrak{m})$ and
   836 decrementing $L[\mathcal{L}(v')]$ for all $v'\in\Gamma_{2} (v) \cap
   837 T_{2}(\mathfrak{m})$, the first part of the cutting rules is checkable in
   838 $\Theta(\Delta)$ time by considering the proper signs of $L$. Setting $L$
   839 to zero takes $\Theta(\Delta)$ time again, which makes it possible to use
   840 the same table through the whole algorithm. The second part of the
   841 cutting rules can be verified using the same method with $\tilde
   842 T_{1}$ and $\tilde T_{2}$ instead of $T_{1}$ and
   843 $T_{2}$. Thus, the overall time complexity is $\Theta(\Delta)$.
   844 
   845 To maintain the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
   846 and $\tilde T_{2}$, two other integer lookup tables storing the number of covered neighbours of the nodes of the two graphs can be used. This representation allows constant-time membership checking, furthermore it is maintainable in $\Theta(\Delta)$ time whenever a node pair is added or subtracted by incrementing
   847 or decrementing the proper indices. A further improvement is that the
   848 values of $L[\mathcal{L}(u')]$ in case of checking $u$ are dependent only on
   849 $u$, i.e. on the current depth of the recursion, so for each $u\in V_{1}$, an array of pairs \textit{(label, number of such labels)} can store $L$. Note that these arrays are at most of size
   850 $\Delta_1$ if pairs with non-appearing node labels are discarded.
   851 
   852 Using similar techniques, the consistency function can be evaluated in
   853 $\Theta(\Delta)$ steps, as well.
   854 
   855 \section{Experimental results}\label{sec:ExpRes}
   856 This section compares the performance of VF2++ and VF2~Plus. According to
   857 our experience, both algorithms run faster than VF2 with orders of
   858 magnitude, thus its inclusion was not reasonable.
   859 
   860 The algorithms were implemented in C++ using the open-source
   861 LEMON graph and network optimization library~\cite{LEMON}. The tests were carried out on a Linux-based system with an Intel i7 X980 3.33 GHz CPU and 6 GB of RAM.
   862 \subsection{Biological graphs}
   863 The tests have been executed on a recent biological dataset created
   864 for the International Contest on Pattern Search in Biological
   865 Databases~\cite{Content}, which has been constructed of molecule,
   866 protein and contact map graphs extracted from the Protein Data
   867 Bank~\cite{ProteinDataBank}.
   868 
   869 The molecule dataset contains small graphs with less than 100 nodes
   870 and an average degree of less than 3. The protein dataset contains
   871 graphs having 500-10 000 nodes and an average degree of 4, while the
   872 contact map dataset contains graphs with 150-800 nodes and an average
   873 degree of 20.  \\
   874 
   875 In the following, both the induced subgraph and the graph
   876 isomorphism problems will be examined.
   877 This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For runtime results, please see Figure~\ref{fig:bioIND}.
   878 
   879 In an other experiment, the nodes of each graph in the database had been
   880 shuffled, and an isomorphism between the shuffled and the original
   881 graph was searched. The running times are shown on Figure~\ref{fig:bioISO}.
   882 
   883 
   884 \begin{figure}[H]
   885 \vspace*{-2cm}
   886 \hspace*{-1.5cm}
   887 \begin{subfigure}[b]{0.55\textwidth}
   888 \begin{figure}[H]
   889 \begin{tikzpicture}[trim axis left, trim axis right]
   890 \begin{axis}[title=Molecules IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
   891 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
   892   west},scaled x ticks = false,x tick label style={/pgf/number
   893   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
   894   format/1000 sep = \kern 0.08em}]
   895 %\addplot+[only marks] table {proteinsOrig.txt};
   896 \addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
   897   size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
   898 \end{axis}
   899 \end{tikzpicture}
   900 \caption{In the case of molecules, the algorithms have
   901   similar behaviour, but VF2++ is almost two times faster even on such
   902   small graphs.} \label{fig:INDMolecule}
   903 \end{figure}
   904 \end{subfigure}
   905 \hspace*{1.5cm}
   906 \begin{subfigure}[b]{0.55\textwidth}
   907 \begin{figure}[H]
   908 \begin{tikzpicture}[trim axis left, trim axis right]
   909 \begin{axis}[title=Contact maps IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
   910 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
   911   west},scaled x ticks = false,x tick label style={/pgf/number
   912   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
   913   format/1000 sep = \kern 0.08em}]
   914 %\addplot+[only marks] table {proteinsOrig.txt};
   915 \addplot table {Orig/ContactMaps.128.txt};
   916 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
   917         {VF2PPLabel/ContactMaps.128.txt};
   918 \end{axis}
   919 \end{tikzpicture}
   920 \caption{On contact maps, VF2++ runs almost in constant time, while VF2
   921   Plus has a near-linear behaviour.} \label{fig:INDContact}
   922 \end{figure}
   923 \end{subfigure}
   924 
   925 \begin{center}
   926 \vspace*{-0.5cm}
   927 \begin{subfigure}[b]{0.55\textwidth}
   928 \begin{figure}[H]
   929 \begin{tikzpicture}[trim axis left, trim axis right]
   930   \begin{axis}[title=Proteins IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
   931   =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
   932     west},scaled x ticks = false,x tick label style={/pgf/number
   933   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
   934   format/1000 sep = \kern 0.08em}] %\addplot+[only marks] table
   935     {proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
   936     table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
   937       size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
   938   \end{axis}
   939   \end{tikzpicture}
   940 \caption{Both of the algorithms have linear behaviour on protein
   941   graphs. VF2++ is more than 10 times faster than VF2
   942   Plus.} \label{fig:INDProt}
   943 \end{figure}
   944 \end{subfigure}
   945 \end{center}
   946 \vspace*{-0.5cm}
   947 \caption{\normalsize{Induced subgraph isomorphism problem on biological graphs}}\label{fig:bioIND}
   948 \end{figure}
   949 
   950 
   951 \begin{figure}[H]
   952 \vspace*{-2cm}
   953 \hspace*{-1.5cm}
   954 \begin{subfigure}[b]{0.55\textwidth}
   955 \begin{figure}[H]
   956 \begin{tikzpicture}[trim axis left, trim axis right]
   957 \begin{axis}[title=Molecules ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
   958 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
   959   west},scaled x ticks = false,x tick label style={/pgf/number
   960   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
   961   format/1000 sep = \kern 0.08em}]
   962 %\addplot+[only marks] table {proteinsOrig.txt};
   963 \addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
   964   size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
   965 \end{axis}
   966 \end{tikzpicture}
   967 \caption{The results are close to each other on contact maps, but VF2++ seems to be slightly faster as the number of nodes increases.
   968   }\label{fig:ISOMolecule}
   969 \end{figure}
   970 \end{subfigure}
   971 \hspace*{1.5cm}
   972 \begin{subfigure}[b]{0.55\textwidth}
   973 \begin{figure}[H]
   974 \begin{tikzpicture}[trim axis left, trim axis right]
   975 \begin{axis}[title=Contact maps ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
   976 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
   977   west},scaled x ticks = false,x tick label style={/pgf/number
   978   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
   979   format/1000 sep = \kern 0.08em}]
   980 %\addplot+[only marks] table {proteinsOrig.txt};
   981 \addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
   982   size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
   983 \end{axis}
   984 \end{tikzpicture}
   985 \caption{In the case of molecules, there is no significant
   986   difference, but VF2++ performs consistently better.}\label{fig:ISOContact}
   987 \end{figure}
   988 \end{subfigure}
   989 
   990 \begin{center}
   991 \vspace*{-0.5cm}
   992 \begin{subfigure}[b]{0.55\textwidth}
   993 \begin{figure}[H]
   994 \begin{tikzpicture}[trim axis left, trim axis right]
   995 \begin{axis}[title=Proteins ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
   996 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
   997   west},scaled x ticks = false,x tick label style={/pgf/number
   998   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
   999   format/1000 sep = \kern 0.08em}]
  1000 %\addplot+[only marks] table {proteinsOrig.txt};
  1001 \addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
  1002   size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
  1003 \end{axis}
  1004 \end{tikzpicture}
  1005 \caption{On protein graphs, VF2~Plus has a super linear time
  1006   complexity, while VF2++ runs in near constant time. The difference
  1007   is about two orders of magnitude on large graphs.}\label{fig:ISOProt}
  1008 \end{figure}
  1009 \end{subfigure}
  1010 \end{center}
  1011 \vspace*{-0.6cm}
  1012 \caption{\normalsize{Graph isomorphism problem on biological graphs}}\label{fig:bioISO}
  1013 \end{figure}
  1014 
  1015 
  1016 
  1017 
  1018 \subsection{Random graphs}
  1019 This section compares VF2++ with VF2~Plus on random graphs of large
  1020 size. The node labels are uniformly distributed.  Let $\delta$ denote
  1021 the average degree.  For the parameters of problems solved in the
  1022 experiments, please see the top of each chart.
  1023 \subsubsection{Graph isomorphism problem}
  1024 To evaluate the efficiency of the algorithms in the case of graph
  1025 isomorphism problem, random connected graphs of less than 20 000 nodes have been
  1026 considered. Generating a random graph and shuffling its nodes, an
  1027 isomorphism had to be found. Figure~\ref{fig:randISO} shows the runtime results
  1028 on graph sets of various density.
  1029 
  1030 
  1031 
  1032 
  1033 \begin{figure}
  1034 \vspace*{-1.5cm}
  1035 \hspace*{-1.5cm}
  1036 \begin{subfigure}[b]{0.55\textwidth}
  1037 \begin{center}
  1038 \begin{tikzpicture}
  1039 \begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1040 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1041   west},scaled x ticks = false,x tick label style={/pgf/number
  1042   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1043   format/1000 sep = \kern 0.08em}]
  1044 %\addplot+[only marks] table {proteinsOrig.txt};
  1045 \addplot table {randGraph/iso/vf2pIso5_1.txt};
  1046 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1047         {randGraph/iso/vf2ppIso5_1.txt};
  1048 \end{axis}
  1049 \end{tikzpicture}
  1050 \end{center}
  1051 \end{subfigure}
  1052 %\hspace{1cm}
  1053 \begin{subfigure}[b]{0.55\textwidth}
  1054 \begin{center}
  1055 \begin{tikzpicture}
  1056 \begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1057 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1058   west},scaled x ticks = false,x tick label style={/pgf/number
  1059   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1060   format/1000 sep = \kern 0.08em}]
  1061 %\addplot+[only marks] table {proteinsOrig.txt};
  1062 \addplot table {randGraph/iso/vf2pIso10_1.txt};
  1063 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1064         {randGraph/iso/vf2ppIso10_1.txt};
  1065 \end{axis}
  1066 \end{tikzpicture}
  1067 \end{center}
  1068 \end{subfigure}
  1069 %%\hspace{1cm}
  1070 \hspace*{-1.5cm}
  1071 \begin{subfigure}[b]{0.55\textwidth}
  1072 \begin{center}
  1073 \begin{tikzpicture}
  1074 \begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1075 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1076   west},scaled x ticks = false,x tick label style={/pgf/number
  1077   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1078   format/1000 sep = \kern 0.08em}]
  1079 %\addplot+[only marks] table {proteinsOrig.txt};
  1080 \addplot table {randGraph/iso/vf2pIso15_1.txt};
  1081 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1082         {randGraph/iso/vf2ppIso15_1.txt};
  1083 \end{axis}
  1084 \end{tikzpicture}
  1085 \end{center}
  1086      \end{subfigure}
  1087      \begin{subfigure}[b]{0.55\textwidth}
  1088 \begin{center}
  1089 \begin{tikzpicture}
  1090 \begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1091 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1092   west},scaled x ticks = false,x tick label style={/pgf/number
  1093   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1094   format/1000 sep = \kern 0.08em}]
  1095 %\addplot+[only marks] table {proteinsOrig.txt};
  1096 \addplot table {randGraph/iso/vf2pIso100_1.txt};
  1097 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1098         {randGraph/iso/vf2ppIso100_1.txt};
  1099 \end{axis}
  1100 \end{tikzpicture}
  1101 \end{center}
  1102 \end{subfigure}
  1103 \vspace*{-0.8cm}
  1104 \caption{Graph isomorphism problem on random graphs
  1105 }\label{fig:randISO}
  1106 \end{figure}
  1107 
  1108 
  1109 \subsubsection{Induced subgraph isomorphism problem}
  1110 This section presents a comparison of VF2++ and VF2~Plus in the case
  1111 of induced subgraph isomorphism problem. In addition to the size of graph $G_2$, that of $G_1$ dramatically influences the hardness of
  1112 a given problem too, so the overall picture is provided by examining graphs to be embedded of various size.
  1113 
  1114 For each chart, a number $0<\rho< 1$ has been fixed, and the following
  1115 has been executed 150 times. Generating a large graph $G_{2}$ of an average degree of $\delta$,
  1116 choose 10 of its induced subgraphs having $\rho|V_{2}|$ nodes,
  1117 and for all the 10 subgraphs find a mapping by using both graph
  1118 matching algorithms.  The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
  1119 0.3, 0.8$ cases have been examined, see
  1120 Figure~\ref{fig:randIND5},~\ref{fig:randIND10}~and~\ref{fig:randIND35}.
  1121 
  1122 
  1123 
  1124 
  1125 
  1126 \begin{figure}
  1127 \vspace*{-1.5cm}
  1128 \hspace*{-1.5cm}
  1129 \begin{subfigure}[b]{0.55\textwidth}
  1130 \begin{center}
  1131 \begin{tikzpicture}
  1132 \begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1133 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1134   west},scaled x ticks = false,x tick label style={/pgf/number
  1135   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1136   format/1000 sep = \kern 0.08em}]
  1137 %\addplot+[only marks] table {proteinsOrig.txt};
  1138 \addplot table {randGraph/ind/vf2pInd5_0.05.txt};
  1139 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1140         {randGraph/ind/vf2ppInd5_0.05.txt};
  1141 \end{axis}
  1142 \end{tikzpicture}
  1143 \end{center}
  1144      \end{subfigure}
  1145      \begin{subfigure}[b]{0.55\textwidth}
  1146 \begin{center}
  1147 \begin{tikzpicture}
  1148 \begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1149 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1150   west},scaled x ticks = false,x tick label style={/pgf/number
  1151   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1152   format/1000 sep = \kern 0.08em}]
  1153 %\addplot+[only marks] table {proteinsOrig.txt};
  1154 \addplot table {randGraph/ind/vf2pInd5_0.1.txt};
  1155 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1156         {randGraph/ind/vf2ppInd5_0.1.txt};
  1157 \end{axis}
  1158 \end{tikzpicture}
  1159 \end{center}
  1160 \end{subfigure}
  1161 \hspace*{-1.5cm}
  1162 \begin{subfigure}[b]{0.55\textwidth}
  1163 \begin{center}
  1164 \begin{tikzpicture}
  1165 \begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1166 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1167   west},scaled x ticks = false,x tick label style={/pgf/number
  1168   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1169   format/1000 sep = \kern 0.08em}]
  1170 %\addplot+[only marks] table {proteinsOrig.txt};
  1171 \addplot table {randGraph/ind/vf2pInd5_0.3.txt};
  1172 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1173         {randGraph/ind/vf2ppInd5_0.3.txt};
  1174 \end{axis}
  1175 \end{tikzpicture}
  1176 \end{center}
  1177      \end{subfigure}
  1178      \begin{subfigure}[b]{0.55\textwidth}
  1179 \begin{center}
  1180 \begin{tikzpicture}
  1181 \begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1182 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1183   west},scaled x ticks = false,x tick label style={/pgf/number
  1184   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1185   format/1000 sep = \kern 0.08em}]
  1186 %\addplot+[only marks] table {proteinsOrig.txt};
  1187 \addplot table {randGraph/ind/vf2pInd5_0.8.txt};
  1188 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1189         {randGraph/ind/vf2ppInd5_0.8.txt};
  1190 \end{axis}
  1191 \end{tikzpicture}
  1192 \end{center}
  1193 \end{subfigure}
  1194 \vspace*{-0.8cm}
  1195 \caption{Induced subgraph isomorphism problem on random graphs having an average degree of
  1196   5}\label{fig:randIND5}
  1197 \end{figure}
  1198 
  1199 
  1200 \begin{figure}
  1201 \vspace*{-1.5cm}
  1202 \hspace*{-1.5cm}
  1203 \begin{subfigure}[b]{0.55\textwidth}
  1204 \begin{center}
  1205 \hspace*{-0.5cm}
  1206 \begin{tikzpicture}
  1207 \begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1208 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1209   west},scaled x ticks = false,x tick label style={/pgf/number
  1210   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1211   format/1000 sep = \kern 0.08em}]
  1212 %\addplot+[only marks] table {proteinsOrig.txt};
  1213 \addplot table {randGraph/ind/vf2pInd10_0.05.txt};
  1214 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1215         {randGraph/ind/vf2ppInd10_0.05.txt};
  1216 \end{axis}
  1217 \end{tikzpicture}
  1218 \end{center}
  1219      \end{subfigure}
  1220      \begin{subfigure}[b]{0.55\textwidth}
  1221 \begin{center}
  1222      \hspace*{-0.5cm}
  1223 \begin{tikzpicture}
  1224 \begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1225 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1226   west},scaled x ticks = false,x tick label style={/pgf/number
  1227   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1228   format/1000 sep = \kern 0.08em}]
  1229 %\addplot+[only marks] table {proteinsOrig.txt};
  1230 \addplot table {randGraph/ind/vf2pInd10_0.1.txt};
  1231 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1232         {randGraph/ind/vf2ppInd10_0.1.txt};
  1233 \end{axis}
  1234 \end{tikzpicture}
  1235 \end{center}
  1236 \end{subfigure}
  1237 \hspace*{-1.5cm}
  1238 \begin{subfigure}[b]{0.55\textwidth}
  1239 \begin{center}
  1240 \begin{tikzpicture}
  1241 \begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1242 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1243   west},scaled x ticks = false,x tick label style={/pgf/number
  1244   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1245   format/1000 sep = \kern 0.08em}]
  1246 %\addplot+[only marks] table {proteinsOrig.txt};
  1247 \addplot table {randGraph/ind/vf2pInd10_0.3.txt};
  1248 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1249         {randGraph/ind/vf2ppInd10_0.3.txt};
  1250 \end{axis}
  1251 \end{tikzpicture}
  1252 \end{center}
  1253      \end{subfigure}
  1254      \begin{subfigure}[b]{0.55\textwidth}
  1255 \begin{center}
  1256 \begin{tikzpicture}
  1257 \begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1258 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1259   west},scaled x ticks = false,x tick label style={/pgf/number
  1260   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1261   format/1000 sep = \kern 0.08em}]
  1262 %\addplot+[only marks] table {proteinsOrig.txt};
  1263 \addplot table {randGraph/ind/vf2pInd10_0.8.txt};
  1264 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1265         {randGraph/ind/vf2ppInd10_0.8.txt};
  1266 \end{axis}
  1267 \end{tikzpicture}
  1268 \end{center}
  1269 \end{subfigure}
  1270 \vspace*{-0.8cm}
  1271 \caption{Induced subgraph isomorphism problem on random graphs having an average degree of
  1272   10}\label{fig:randIND10}
  1273 \end{figure}
  1274 
  1275 
  1276 
  1277 \begin{figure}
  1278 \vspace*{-1.5cm}
  1279 \hspace*{-1.5cm}
  1280 \begin{subfigure}[b]{0.55\textwidth}
  1281 \begin{center}
  1282 \begin{tikzpicture}
  1283 \begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1284 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1285   west},scaled x ticks = false,x tick label style={/pgf/number
  1286   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1287   format/1000 sep = \kern 0.08em}]
  1288 %\addplot+[only marks] table {proteinsOrig.txt};
  1289 \addplot table {randGraph/ind/vf2pInd35_0.05.txt};
  1290 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1291         {randGraph/ind/vf2ppInd35_0.05.txt};
  1292 \end{axis}
  1293 \end{tikzpicture}
  1294 \end{center}
  1295      \end{subfigure}
  1296      \begin{subfigure}[b]{0.55\textwidth}
  1297 \begin{center}
  1298 \begin{tikzpicture}
  1299 \begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1300 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1301   west},scaled x ticks = false,x tick label style={/pgf/number
  1302   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1303   format/1000 sep = \kern 0.08em}]
  1304 %\addplot+[only marks] table {proteinsOrig.txt};
  1305 \addplot table {randGraph/ind/vf2pInd35_0.1.txt};
  1306 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1307         {randGraph/ind/vf2ppInd35_0.1.txt};
  1308 \end{axis}
  1309 \end{tikzpicture}
  1310 \end{center}
  1311 \end{subfigure}
  1312 \hspace*{-1.5cm}
  1313 \begin{subfigure}[b]{0.55\textwidth}
  1314 \begin{center}
  1315 \begin{tikzpicture}
  1316 \begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1317 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1318   west},scaled x ticks = false,x tick label style={/pgf/number
  1319   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1320   format/1000 sep = \kern 0.08em}]
  1321 %\addplot+[only marks] table {proteinsOrig.txt};
  1322 \addplot table {randGraph/ind/vf2pInd35_0.3.txt};
  1323 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1324         {randGraph/ind/vf2ppInd35_0.3.txt};
  1325 \end{axis}
  1326 \end{tikzpicture}
  1327 \end{center}
  1328      \end{subfigure}
  1329      \begin{subfigure}[b]{0.55\textwidth}
  1330 \begin{center}
  1331 \begin{tikzpicture}
  1332 \begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
  1333 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
  1334   west},scaled x ticks = false,x tick label style={/pgf/number
  1335   format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
  1336   format/1000 sep = \kern 0.08em}]
  1337 %\addplot+[only marks] table {proteinsOrig.txt};
  1338 \addplot table {randGraph/ind/vf2pInd35_0.8.txt};
  1339 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
  1340         {randGraph/ind/vf2ppInd35_0.8.txt};
  1341 \end{axis}
  1342 \end{tikzpicture}
  1343 \end{center}
  1344 \end{subfigure}
  1345 \vspace*{-0.8cm}
  1346 \caption{Induced subgraph isomorphism problem on random graphs having an average degree of
  1347   35}\label{fig:randIND35}
  1348 \end{figure}
  1349 
  1350 
  1351 Based on these experiments, VF2++ is faster than VF2~Plus and able to
  1352 handle really large graphs in milliseconds. Note that when $IND$ was
  1353 considered and the graph to be embedded had proportionally few nodes ($\rho =
  1354 0.05$, or $\rho = 0.1$), then VF2~Plus produced some inefficient node
  1355 orders (e.g. see the $\delta=10$ case on
  1356 Figure~\ref{fig:randIND10}). If these instances had been excluded, the
  1357 charts would have looked similarly to the other ones.
  1358 Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
  1359 Plus slow slightly down, but remain practically usable even on graphs
  1360 having 10 000 nodes.
  1361 
  1362 
  1363 
  1364 
  1365 
  1366 \section{Conclusion}
  1367 This paper presented VF2++, a new graph matching algorithm based on VF2, and analysed it from a practical viewpoint.
  1368 
  1369 Recognizing the importance of the node order and determining an
  1370 efficient one, VF2++ is able to match graphs of thousands of nodes in
  1371 near practically linear time including preprocessing. In addition to
  1372 the proper order, VF2++ uses more efficient cutting
  1373 rules which are easy to compute and make the algorithm able to prune
  1374 most of the unfruitful branches without going astray.
  1375 
  1376 In order to show the efficiency of the new method, it has been
  1377 compared to VF2~Plus~\cite{VF2Plus}, which is the best contemporary algorithm.
  1378 
  1379 The experiments show that VF2++ consistently outperforms VF2~Plus on
  1380 biological graphs. It seems to be asymptotically faster on protein and
  1381 on contact map graphs in the case of induced subgraph isomorphism problem,
  1382 while in the case of graph isomorphism problem, it has definitely better
  1383 asymptotic behaviour on protein graphs.
  1384 
  1385 Regarding random sparse graphs, not only has VF2++ proved itself to be
  1386 faster than VF2~Plus, but it also has a practically linear behaviour both
  1387 in the case of induced subgraph and graph isomorphism problems.
  1388 
  1389 %%%%%%%%%%%%%%%%
  1390 \section*{Acknowledgement} \label{sec:ack}
  1391 %%%%%%%%%%%%%%%%
  1392 This research project was initiated and sponsored by QuantumBio
  1393 Inc.~\cite{QUANTUMBIO}.
  1394 
  1395 The authors were supported by the Hungarian Scientific Research Fund -
  1396 OTKA, K109240 and by the J\'anos Bolyai Research Fellowship program of
  1397 the Hungarian Academy of Sciences.
  1398 
  1399 
  1400 %% The Appendices part is started with the command \appendix;
  1401 %% appendix sections are then done as normal sections
  1402 %% \appendix
  1403 
  1404 %% \section{}
  1405 %% \label{}
  1406 
  1407 %% If you have bibdatabase file and want bibtex to generate the
  1408 %% bibitems, please use
  1409 %%
  1410 \bibliographystyle{elsarticle-num} \bibliography{bibliography}
  1411 
  1412 %% else use the following coding to input the bibitems directly in the
  1413 %% TeX file.
  1414 
  1415 %% \begin{thebibliography}{00}
  1416 
  1417 %% %% \bibitem{label}
  1418 %% %% Text of bibliographic item
  1419 
  1420 %% \bibitem{}
  1421 
  1422 %% \end{thebibliography}
  1423 
  1424 \end{document}
  1425 \endinput
  1426 %%
  1427 %% End of file `elsarticle-template-num.tex'.