lemon-vf2: damecco.tex@0ff72a828b16 (annotated)

alpar@0	1	%%
alpar@0	2	%% Copyright 2007, 2008, 2009 Elsevier Ltd
alpar@0	3	%%
alpar@0	4	%% This file is part of the 'Elsarticle Bundle'.
alpar@0	5	%% ---------------------------------------------
alpar@0	6	%%
alpar@0	7	%% It may be distributed under the conditions of the LaTeX Project Public
alpar@0	8	%% License, either version 1.2 of this license or (at your option) any
alpar@0	9	%% later version. The latest version of this license is in
alpar@0	10	%% http://www.latex-project.org/lppl.txt
alpar@0	11	%% and version 1.2 or later is part of all distributions of LaTeX
alpar@0	12	%% version 1999/12/01 or later.
alpar@0	13	%%
alpar@0	14	%% The list of all files belonging to the 'Elsarticle Bundle' is
alpar@0	15	%% given in the file `manifest.txt'.
alpar@0	16	%%
alpar@0	17
alpar@0	18	%% Template article for Elsevier's document class `elsarticle'
alpar@0	19	%% with numbered style bibliographic references
alpar@0	20	%% SP 2008/03/01
alpar@0	21
alpar@0	22	\documentclass[preprint,12pt]{elsarticle}
alpar@0	23
alpar@0	24	%% Use the option review to obtain double line spacing
alpar@0	25	%% \documentclass[authoryear,preprint,review,12pt]{elsarticle}
alpar@0	26
alpar@0	27	%% Use the options 1p,twocolumn; 3p; 3p,twocolumn; 5p; or 5p,twocolumn
alpar@0	28	%% for a journal layout:
alpar@0	29	%% \documentclass[final,1p,times]{elsarticle}
alpar@0	30	%% \documentclass[final,1p,times,twocolumn]{elsarticle}
alpar@0	31	%% \documentclass[final,3p,times]{elsarticle}
alpar@0	32	%% \documentclass[final,3p,times,twocolumn]{elsarticle}
alpar@0	33	%% \documentclass[final,5p,times]{elsarticle}
alpar@0	34	%% \documentclass[final,5p,times,twocolumn]{elsarticle}
alpar@0	35
alpar@0	36	%% For including figures, graphicx.sty has been loaded in
alpar@0	37	%% elsarticle.cls. If you prefer to use the old commands
alpar@0	38	%% please give \usepackage{epsfig}
alpar@0	39
alpar@0	40	%% The amssymb package provides various useful mathematical symbols
alpar@0	41	\usepackage{amssymb}
alpar@0	42	%% The amsthm package provides extended theorem environments
alpar@0	43	%% \usepackage{amsthm}
alpar@0	44
alpar@0	45	%% The lineno packages adds line numbers. Start line numbering with
alpar@0	46	%% \begin{linenumbers}, end it with \end{linenumbers}. Or switch it on
alpar@0	47	%% for the whole article with \linenumbers.
alpar@0	48	%% \usepackage{lineno}
alpar@0	49
alpar@2	50	\usepackage{amsmath}
alpar@2	51	%% \usepackage[pdftex]{graphicx}
alpar@2	52
alpar@2	53	\usepackage{pgfplots}
alpar@2	54	\pgfplotsset{width=9cm}
alpar@2	55	\pgfplotsset{compat=1.8}
alpar@2	56
alpar@2	57	\usepackage{caption}
alpar@2	58	\usepackage{subcaption}
alpar@2	59
alpar@2	60	\usepackage{algorithm}
alpar@2	61	\usepackage{algpseudocode}
alpar@2	62	\usepackage{tikz}
alpar@2	63
alpar@2	64	\usepackage{amsthm,amssymb}
alpar@2	65	\renewcommand{\qedsymbol}{\rule{0.7em}{0.7em}}
alpar@2	66
alpar@2	67	\newtheorem{theorem}{Theorem}[subsection]
alpar@2	68	\newtheorem{corollary}{Corollary}[theorem]
alpar@2	69	\newtheorem{claim}[theorem]{Claim}
alpar@2	70
alpar@2	71	\newtheorem{definition}{Definition}[subsection]
alpar@2	72	\newtheorem{notation}{Notation}[subsection]
alpar@2	73	\newtheorem{example}{Example}[subsection]
alpar@2	74	\usetikzlibrary{decorations.markings}
alpar@2	75	\let\oldproofname=\proofname
alpar@2	76	%% \renewcommand{\proofname}{\rm\bf{Proof:}}
alpar@2	77
Madarasi@7	78	\captionsetup{font=normalsize}
Madarasi@7	79
alpar@1	80	\journal{Discrete Applied Mathematics}
alpar@0	81
alpar@0	82	\begin{document}
alpar@0	83
alpar@0	84	\begin{frontmatter}
alpar@0	85
alpar@0	86	%% Title, authors and addresses
alpar@0	87
alpar@0	88	%% use the tnoteref command within \title for footnotes;
alpar@0	89	%% use the tnotetext command for theassociated footnote;
alpar@0	90	%% use the fnref command within \author or \address for footnotes;
alpar@0	91	%% use the fntext command for theassociated footnote;
alpar@0	92	%% use the corref command within \author for corresponding author footnotes;
alpar@0	93	%% use the cortext command for theassociated footnote;
alpar@0	94	%% use the ead command for the email address,
alpar@0	95	%% and the form \ead[url] for the home page:
alpar@0	96	%% \title{Title\tnoteref{label1}}
alpar@0	97	%% \tnotetext[label1]{}
alpar@0	98	%% \author{Name\corref{cor1}\fnref{label2}}
alpar@0	99	%% \ead{email address}
alpar@0	100	%% \ead[url]{home page}
alpar@0	101	%% \fntext[label2]{}
alpar@0	102	%% \cortext[cor1]{}
alpar@0	103	%% \address{Address\fnref{label3}}
alpar@0	104	%% \fntext[label3]{}
alpar@0	105
alpar@24	106	\title{VF2++ --- An Improved Subgraph Isomorphism Algorithm}
alpar@0	107
alpar@0	108	%% use optional labels to link authors explicitly to addresses:
alpar@0	109	%% \author[label1,label2]{}
alpar@0	110	%% \address[label1]{}
alpar@0	111	%% \address[label2]{}
alpar@0	112
alpar@24	113	\author[egres,elte]{Alp{\'a}r J{\"u}ttner}
alpar@24	114	\ead{alpar@cs.elte.hu}
alpar@24	115	\author[elte]{P{\'e}ter Madarasi}
alpar@24	116	\ead{madarasip@caesar.elte.hu}
alpar@24	117	\address[egres]{MTA-ELTE Egerv{\'a}ry Research Group, Budapest, Hungary.}
alpar@24	118	\address[elte]{Department of Operations Research, E{\"o}tv{\"o}s Lor{\'a}nd University, Budapest, Hungary.}
alpar@0	119
alpar@0	120	\begin{abstract}
alpar@0	121
alpar@24	122	This paper presents a largely improved version of the VF2 algorithm
alpar@24	123	for the \emph{Subgraph Isomorphism Problem}. The improvements are
alpar@24	124	twofold. Firstly, it is based on a new approach for determining the
alpar@29	125	matching order of the nodes, and secondly, more efficient ---
alpar@29	126	nevertheless easier to compute --- cutting rules are proposed. They
alpar@29	127	together reduce the search space significantly.
alpar@1	128
Madarasi@28	129	In addition to the usual \emph{Subgraph Isomorphism Problem}, the paper also
Madarasi@28	130	presents specialized algorithms for the \emph{Induced Subgraph
alpar@24	131	Isomorphism} and for the \emph{Graph Isomorphism Problems}.
alpar@1	132
alpar@24	133	Finally, an extensive experimental evaluation is provided using a
Madarasi@28	134	wide range of inputs, including both real-life biological and
alpar@24	135	chemical datasets and standard randomly generated graph series. The
alpar@24	136	results show major and consistent running time improvements over the
alpar@24	137	other known methods.
alpar@24	138
Madarasi@28	139	The C++ implementations of the algorithms are available open-source as
Madarasi@28	140	part of the LEMON graph and network optimization library.
alpar@24	141
alpar@0	142	\end{abstract}
alpar@0	143
alpar@0	144	\begin{keyword}
alpar@24	145	Computational Biology, Subgraph Isomorphism Problem
alpar@0	146	%% keywords here, in the form: keyword \sep keyword
alpar@0	147
alpar@0	148	%% PACS codes here, in the form: \PACS code \sep code
alpar@0	149
alpar@0	150	%% MSC codes here, in the form: \MSC code \sep code
alpar@0	151	%% or \MSC[2008] code \sep code (2000 is the default)
alpar@0	152
alpar@0	153	\end{keyword}
alpar@0	154
alpar@0	155	\end{frontmatter}
alpar@0	156
alpar@0	157	%% \linenumbers
alpar@0	158
alpar@0	159	%% main text
alpar@2	160	\section{Introduction}
alpar@2	161	\label{sec:intro}
alpar@2	162
alpar@3	163	In the last decades, combinatorial structures, and especially graphs
alpar@3	164	have been considered with ever increasing interest, and applied to the
alpar@29	165	solution of several new and revised questions. The expressiveness,
alpar@29	166	the simplicity and the deep theoretical background
alpar@29	167	of graphs make it one of the most useful
alpar@29	168	modeling tool and appears constantly in several seemingly independent
Madarasi@19	169	fields, such as bioinformatics and chemistry.
alpar@2	170
alpar@3	171	Complex biological systems arise from the interaction and cooperation
alpar@29	172	of plenty of molecular components. Getting acquainted with the
alpar@29	173	structure of such systems at the molecular level is of primary
alpar@29	174	importance, since protein-protein interaction, DNA-protein
alpar@29	175	interaction, metabolic interaction, transcription factor binding,
alpar@29	176	neuronal networks, and hormone singling networks can be understood
alpar@29	177	this way.
alpar@2	178
alpar@29	179	Many chemical and biological structures can easily be modeled
Madarasi@19	180	as graphs, for instance, a molecular structure can be
Madarasi@19	181	considered as a graph, whose nodes correspond to atoms and whose
Madarasi@19	182	edges to chemical bonds. The similarity and dissimilarity of
Madarasi@19	183	objects corresponding to nodes are incorporated to the model
Madarasi@19	184	by \emph{node labels}. Understanding such networks basically
Madarasi@26	185	requires finding specific subgraphs, thus it calls for efficient
Madarasi@19	186	graph matching algorithms.
alpar@2	187
Madarasi@19	188	Other real-world fields related to some
Madarasi@19	189	variants of graph matching include pattern recognition
Madarasi@28	190	and machine vision~\cite{HorstBunkeApplications}, symbol recognition~\cite{CordellaVentoSymbolRecognition}, and face identification~\cite{JianzhuangYongFaceIdentification}. \\
alpar@2	191
alpar@3	192	Subgraph and induced subgraph matching problems are known to be
Madarasi@28	193	NP-Complete~\cite{SubgraphNPC}, while the graph isomorphism problem is
alpar@3	194	one of the few problems in NP neither known to be in P nor
Madarasi@28	195	NP-Complete. Although polynomial-time isomorphism algorithms are known
alpar@3	196	for various graph classes, like trees and planar
Madarasi@28	197	graphs~\cite{PlanarGraphIso}, bounded valence
Madarasi@28	198	graphs~\cite{BondedDegGraphIso}, interval graphs~\cite{IntervalGraphIso}
alpar@29	199	or permutation graphs~\cite{PermGraphIso}. Furthermore, an FPT algorithm has also been presented for the colored hypergraph isomorphism problem in~\cite{ColoredHiperGraphIso}.
alpar@2	200
alpar@29	201	In the following, some algorithms which do not need any restrictions on the graphs are
alpar@29	202	summarized. Even though,
alpar@29	203	an overall polynomial behavior is not expectable from such an
alpar@29	204	alternative, they may often have good practical performance, in fact,
alpar@29	205	they might be the best choice in practice even on a graph class for which polynomial
Madarasi@19	206	algorithm is known.
alpar@2	207
alpar@3	208	The first practically usable approach was due to
Madarasi@28	209	\emph{Ullmann}~\cite{Ullmann}, which is a commonly used algorithm based on depth-first
Madarasi@28	210	search with a complex heuristic for reducing the
alpar@3	211	number of visited states. A major problem is its $\Theta(n^3)$ space
alpar@3	212	complexity, which makes it impractical in the case of big sparse
alpar@3	213	graphs.
alpar@2	214
Madarasi@28	215	In a recent paper, Ullmann~\cite{UllmannBit} presents an
alpar@3	216	improved version of this algorithm based on a bit-vector solution for
alpar@3	217	the binary Constraint Satisfaction Problem.
alpar@2	218
Madarasi@28	219	The \emph{Nauty} algorithm~\cite{Nauty} transforms the two graphs to
alpar@29	220	a canonical form before starting to look for an isomorphism. It has
alpar@3	221	been considered as one of the fastest graph isomorphism algorithms,
alpar@3	222	although graph categories were shown in which it takes exponentially
alpar@3	223	many steps. This algorithm handles only the graph isomorphism problem.
alpar@2	224
Madarasi@28	225	The \emph{LAD} algorithm~\cite{Lad} uses a depth-first search
alpar@3	226	strategy and formulates the matching as a Constraint Satisfaction
alpar@3	227	Problem to prune the search tree. The constraints are that the mapping
alpar@29	228	has to be invective and edge-preserving, hence it is possible to
alpar@3	229	handle new matching types as well.
alpar@2	230
Madarasi@28	231	The \emph{RI} algorithm~\cite{RI} and its variations are based on a
alpar@3	232	state space representation. After reordering the nodes of the graphs,
alpar@3	233	it uses some fast executable heuristic checks without using any
alpar@3	234	complex pruning rules. It seems to run really efficiently on graphs
alpar@3	235	coming from biology, and won the International Contest on Pattern
Madarasi@28	236	Search in Biological Databases~\cite{Content}.
alpar@2	237
Madarasi@28	238	Currently, the most commonly used algorithm is the
alpar@29	239	\emph{VF2}~\cite{VF2}, an improved version of \emph{VF}~\cite{VF}, which was
alpar@3	240	designed for solving pattern matching and computer vision problems,
alpar@3	241	and has been one of the best overall algorithms for more than a
Madarasi@28	242	decade. Although, it is not as fast as some of the new specialized algorithms, it is still widely used due to its simplicity and space efficiency. VF2 uses
alpar@29	243	a state space representation and checks specific conditions in each state
alpar@3	244	to prune the search tree.
alpar@2	245
Madarasi@28	246	Meanwhile, another variant called \emph{VF2~Plus}~\cite{VF2Plus} has
alpar@3	247	been published. It is considered to be as efficient as the RI
alpar@29	248	algorithm and has a strictly better behavior on large graphs. The
alpar@29	249	main idea of VF2~Plus is to precompute a heuristic node order of the graph to be embedded, on which VF2 works more efficiently.
alpar@2	250
Madarasi@19	251	This paper introduces \emph{VF2++}, a new further improved algorithm
alpar@29	252	for the graph and (induced) subgraph isomorphism problems. It is based on
Madarasi@19	253	efficient cutting rules and determines a node order in which VF2 runs
Madarasi@19	254	significantly faster on practical inputs.
Madarasi@19	255
alpar@25	256	The rest of the paper is structured as
alpar@25	257	follows. Section~\ref{sec:ProbStat} defines the exact problems to be
alpar@25	258	solved, Section~\ref{sec:VF2Alg} provides a description of VF2. Based
alpar@25	259	on that, Section~\ref{sec:VF2ppAlg} introduces VF2++. Some technical
alpar@25	260	details necessary for an efficient implementation are discussed in
alpar@29	261	Section~\ref{sec:VF2ppImpl}. Finally, Section~\ref{sec:ExpRes}
Madarasi@28	262	provides a detailed experimental evaluation of VF2++ and its comparison
alpar@27	263	to the state-of-the-art algorithm.
Madarasi@26	264
alpar@25	265	It must also be mentioned that the C++ implementations of the
alpar@25	266	algorithms have been made available for evaluation and use under an
alpar@29	267	open-source license as a part of
alpar@29	268	LEMON~\cite{o11:_lemon_open_sourc_c_graph_templ_librar} graph library.
Madarasi@22	269
Madarasi@22	270	\section{Problem Statement}\label{sec:ProbStat}
Madarasi@19	271	This section provides a formal description of the problems to be
alpar@3	272	solved.
alpar@2	273	\subsection{Definitions}
alpar@2	274
Madarasi@19	275	Throughout the paper $G_{1}=(V_{1}, E_{1})$ and
Madarasi@19	276	$G_{2}=(V_{2}, E_{2})$ denote two undirected graphs.
Madarasi@19	277
Madarasi@19	278	\begin{definition}
Madarasi@19	279	$\mathcal{L}: (V_{1}\cup V_{2}) \longrightarrow K$ is a \textbf{node
Madarasi@19	280	label function}, where K is an arbitrary set. The elements in K
Madarasi@19	281	are the \textbf{node labels}. Two nodes, u and v are said to be
Madarasi@19	282	\textbf{equivalent} if $\mathcal{L}(u)=\mathcal{L}(v)$.
Madarasi@19	283	\end{definition}
Madarasi@19	284
alpar@29	285	For the sake of simplicity, the graph, subgraph and induced subgraph
alpar@29	286	isomorphisms are defined in a more general way.
Madarasi@19	287
alpar@2	288	\begin{definition}\label{sec:ismorphic}
Madarasi@19	289	$G_{1}$ and $G_{2}$ are \textbf{isomorphic} (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19	290	V_{1} \longrightarrow V_{2}$ bijection, for which the
alpar@3	291	following is true:
alpar@2	292	\begin{center}
Madarasi@20	293	$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19	294	$\forall u,v\in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in{E_{2}}$
alpar@2	295	\end{center}
alpar@2	296	\end{definition}
Madarasi@19	297
alpar@2	298	\begin{definition}
Madarasi@19	299	$G_{1}$ is a \textbf{subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19	300	V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3	301	following is true:
alpar@2	302	\begin{center}
Madarasi@20	303	$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19	304	$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Rightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2	305	\end{center}
alpar@2	306	\end{definition}
alpar@2	307
alpar@2	308	\begin{definition}
Madarasi@19	309	$G_{1}$ is an \textbf{induced subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists
Madarasi@19	310	\mathfrak{m}: V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3	311	following is true:
alpar@2	312	\begin{center}
Madarasi@19	313	$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and
Madarasi@19	314
Madarasi@19	315	$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow
Madarasi@19	316	(\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2	317	\end{center}
alpar@2	318	\end{definition}
alpar@2	319
alpar@2	320
alpar@2	321	\subsection{Common problems}\label{sec:CommProb}
alpar@2	322
Madarasi@28	323	The focus of this paper is on the following problems appearing in many applications.
alpar@2	324
Madarasi@28	325	The \textbf{subgraph isomorphism problem} is the following: is
Madarasi@19	326	$G_{1}$ isomorphic to any subgraph of $G_{2}$ by a given node
alpar@3	327	label?
alpar@2	328
Madarasi@28	329	The \textbf{induced subgraph isomorphism problem} asks the same about the
alpar@3	330	existence of an induced subgraph.
alpar@2	331
alpar@3	332	The \textbf{graph isomorphism problem} can be defined as induced
alpar@3	333	subgraph matching problem where the sizes of the two graphs are equal.
alpar@2	334
alpar@29	335	In addition, one may either want to find a \textbf{single} embedding or \textbf{enumerate} all of them.
alpar@2	336
Madarasi@22	337	\section{The VF2 Algorithm}\label{sec:VF2Alg}
Madarasi@28	338	This algorithm is the basis of both the VF2++ and the VF2~Plus. VF2
Madarasi@28	339	is able to handle all the variations mentioned in Section~\ref{sec:CommProb}. Although it can also handle directed graphs,
alpar@29	340	for the sake of simplicity, only the undirected case is
alpar@3	341	discussed.
alpar@2	342
alpar@2	343
alpar@2	344	\subsection{Common notations}
Madarasi@19	345	\indent Assume $G_{1}$ is searched in $G_{2}$. The following
alpar@29	346	definitions and notations is used throughout this paper.
alpar@2	347	\begin{definition}
Madarasi@19	348	An injection $\mathfrak{m} : D \longrightarrow V_2$ is called (partial) \textbf{mapping}, where $D\subseteq V_1$.
Madarasi@19	349	\end{definition}
Madarasi@19	350
Madarasi@19	351	\begin{notation}
Madarasi@19	352	$\mathfrak{D}(f)$ and $\mathfrak{R}(f)$ denote the domain and the range of a function $f$, respectively.
Madarasi@19	353	\end{notation}
Madarasi@19	354
Madarasi@19	355	\begin{definition}
Madarasi@19	356	Mapping $\mathfrak{m}$ \textbf{covers} a node $u\in V_1\cup V_2$ if $u\in \mathfrak{D}(\mathfrak{m})\cup \mathfrak{R}(\mathfrak{m})$.
alpar@2	357	\end{definition}
alpar@2	358
alpar@2	359	\begin{definition}
Madarasi@19	360	A mapping $\mathfrak{m}$ is $\mathbf{whole\ mapping}$ if $\mathfrak{m}$ covers all the
Madarasi@19	361	nodes of $V_{1}$, i.e. $\mathfrak{D}(\mathfrak{m})=V_1$.
alpar@2	362	\end{definition}
alpar@2	363
alpar@2	364	\begin{definition}
Madarasi@28	365	Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : f(w)=\mathfrak{m}(w)$ and $f(u)=v$ holds, where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$; otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
alpar@2	366	\end{definition}
alpar@2	367
alpar@2	368	\begin{notation}
Madarasi@19	369	Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
Madarasi@28	370	which can be substituted by any of the $\mathbf{SUB}$, $\mathbf{IND}$
alpar@29	371	and $\mathbf{ISO}$ problems, which stand for the problems mentioned in Section~\ref{sec:CommProb} respectively.
alpar@2	372	\end{notation}
alpar@2	373
alpar@2	374	\begin{definition}
Madarasi@28	375	Let $\mathfrak{m}$ be a mapping. The \textbf{consistency function for } $\mathbf{PT}$ is a logical function, for which $\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if and only if $\mathfrak{m}$ satisfies the requirements of $\mathbf{PT}$ considering the subgraphs of $G_{1}$ and $G_{2}$ induced by $\mathfrak{D}(\mathfrak{m})$ and $\mathfrak{R}(\mathfrak{m})$, respectively.
Madarasi@28	376
Madarasi@28	377	%$\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
alpar@2	378	\end{definition}
alpar@2	379
alpar@2	380	\begin{definition}
Madarasi@19	381	Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
Madarasi@28	382	\textbf{cutting function for } $\mathbf{PT}$ if the following
Madarasi@19	383	holds. $\mathbf{Cut_{PT}(\mathfrak{m})}$ is false if there exists a sequence of extend operations, which results in a whole mapping satisfying the requirements of $PT$.
alpar@2	384	\end{definition}
alpar@2	385
alpar@2	386	\begin{definition}
Madarasi@28	387	A mapping $\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
Madarasi@19	388	$Cons_{PT}(\mathfrak{m})$ is true.
alpar@2	389	\end{definition}
alpar@2	390
alpar@2	391	$Cons_{PT}$ and $Cut_{PT}$ will often be used in the following form.
alpar@2	392	\begin{notation}
Madarasi@19	393	Let $\mathbf{Cons_{PT}(p, \mathfrak{m})}:=Cons_{PT}(extend(\mathfrak{m},p))$, and
Madarasi@19	394	$\mathbf{Cut_{PT}(p, \mathfrak{m})}:=Cut_{PT}(extend(\mathfrak{m},p))$, where
Madarasi@19	395	$p\in{V_{1}\backslash\mathfrak{D}(\mathfrak{m}) \!\times\!V_{2}\backslash\mathfrak{R}(\mathfrak{m})}$.
alpar@2	396	\end{notation}
alpar@2	397
alpar@3	398	$Cons_{PT}$ will be used to check the consistency of the already
alpar@3	399	covered nodes, while $Cut_{PT}$ is for looking ahead to recognize if
alpar@3	400	no whole consistent mapping can contain the current mapping.
alpar@2	401
alpar@2	402	\subsection{Overview of the algorithm}
alpar@2	403
Madarasi@28	404	VF2 begins with an empty mapping and gradually extends it with respect to the consistency and cutting functions until a whole mapping is reached.
Madarasi@28	405
Madarasi@28	406	Algorithm~\ref{alg:VF2Pseu} is a high-level description of
Madarasi@28	407	the VF2 algorithm. Each state of the matching process can
Madarasi@19	408	be associated with a mapping $\mathfrak{m}$. The initial state
Madarasi@19	409	is associated with a mapping $\mathfrak{m}$, for which
Madarasi@19	410	$\mathfrak{D}(\mathfrak{m})=\emptyset$, i.e. it starts with an empty mapping.
alpar@2	411
alpar@2	412
alpar@2	413	\begin{algorithm}
Madarasi@13	414	\algtext*{EndIf}%ne nyomtasson end if-et
Madarasi@13	415	\algtext*{EndFor}%ne
Madarasi@13	416	\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2	417	\caption{\hspace{0.5cm}$A\ high\ level\ description\ of\ VF2$}\label{alg:VF2Pseu}
alpar@2	418	\begin{algorithmic}[1]
alpar@2	419
Madarasi@19	420	\Procedure{VF2}{Mapping $\mathfrak{m}$, ProblemType $PT$}
Madarasi@19	421	\If{$\mathfrak{m}$ covers
Madarasi@19	422	$V_{1}$} \State Output($\mathfrak{m}$)
Madarasi@19	423	\Else
Madarasi@28	424	\State Compute the set $P_\mathfrak{m}$ of the candidate pairs for extending $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
Madarasi@19	425	$\neg$Cut$_{PT}$($p,\mathfrak{m}$)}
Madarasi@19	426	\State \textbf{call}
Madarasi@19	427	VF2($extend(\mathfrak{m},p)$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
alpar@2	428	\end{algorithmic}
alpar@2	429	\end{algorithm}
alpar@2	430
alpar@2	431
Madarasi@19	432	For the current mapping $\mathfrak{m}$, the algorithm computes $P_\mathfrak{m}$, the set of
Madarasi@28	433	candidate node pairs for extending the current mapping $\mathfrak{m}$.
alpar@2	434
Madarasi@19	435	For each pair $p$ in $P_\mathfrak{m}$, $Cons_{PT}(p,\mathfrak{m})$ and
Madarasi@19	436	$Cut_{PT}(p,\mathfrak{m})$ are evaluated. If the former is true and
Madarasi@19	437	the latter is false, the whole process is recursively applied to
Madarasi@19	438	$extend(\mathfrak{m},p)$. Otherwise, $extend(\mathfrak{m},p)$ is not consistent by $PT$, or it
Madarasi@19	439	can be proved that $\mathfrak{m}$ can not be extended to a whole mapping.
alpar@2	440
alpar@29	441	The correctness of the procedure follows from the claim below.
alpar@29	442
Madarasi@28	443	\begin{claim}\label{claim:consMapps}
alpar@3	444	Through consistent mappings, only consistent whole mappings can be
Madarasi@19	445	reached, and all the consistent whole mappings are reachable through
alpar@3	446	consistent mappings.
alpar@2	447	\end{claim}
alpar@2	448
Madarasi@19	449	Note that a mapping may be reached in exponentially many different ways, since the
Madarasi@19	450	order of extensions does not influence the nascent mapping.
alpar@2	451
Madarasi@28	452	However, one may make the following observations.
Madarasi@28	453
Madarasi@28	454	%\begin{claim}
Madarasi@28	455	%\label{claim:claimTotOrd}
Madarasi@28	456	%Let $\prec$ be an arbitrary total ordering relation on $V_{1}$. If
Madarasi@28	457	%the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
Madarasi@28	458	%\begin{center}
Madarasi@28	459	%$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
Madarasi@28	460	%\end{center}
Madarasi@28	461	%then no mapping can be reached more than once, and each whole mapping %remains reachable.
Madarasi@28	462	%\end{claim}
Madarasi@28	463
Madarasi@28	464	\begin{definition}
Madarasi@28	465	A total order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(\|V_{1}\|)})$ of
Madarasi@28	466	$V_{1}$ is \textbf{matching order} if VF2 can cover $u_{\sigma(d)}$ on the $d$-th level for all $d\in\{1,..,\|V_{1}\|\}$.
Madarasi@28	467	\end{definition}
alpar@2	468
alpar@2	469	\begin{claim}
alpar@2	470	\label{claim:claimTotOrd}
Madarasi@28	471	If VF2 is prescribed to cover the nodes of $G_1$ according to a matching order, then no mapping can be reached more than once and each whole mapping remains reachable.
alpar@2	472	\end{claim}
alpar@2	473
Madarasi@28	474	Note that the cornerstone of the improvements to VF2 is to choose a proper
Madarasi@28	475	matching order.
alpar@2	476
Madarasi@19	477	\subsection{The candidate set}
alpar@2	478	\label{candidateComputingVF2}
Madarasi@19	479	Let $P_\mathfrak{m}$ be the set of the candidate pairs for inclusion in $\mathfrak{m}$.
alpar@2	480
alpar@2	481	\begin{notation}
Madarasi@19	482	Let $\mathbf{T_{1}(\mathfrak{m})}:=\{u \in V_{1}\backslash\mathfrak{D}(\mathfrak{m}) : \exists \tilde{u}\in{\mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}}\}$, and
Madarasi@19	483	$\mathbf{T_{2}(\mathfrak{m})} := \{v \in V_{2}\backslash\mathfrak{R}(\mathfrak{m}) : \exists\tilde{v}\in{\mathfrak{R}(\mathfrak{m}):(v,\tilde{v})\in E_{2}}\}$.
alpar@2	484	\end{notation}
alpar@2	485
Madarasi@28	486	The set $P_\mathfrak{m}$ contains the pairs of uncovered neighbours of covered
Madarasi@17	487	nodes, and if there is not such a node pair, all the pairs containing
alpar@3	488	two uncovered nodes are added. Formally, let
alpar@2	489	\[
Madarasi@19	490	P_\mathfrak{m}\!=\!
alpar@2	491	\begin{cases}
Madarasi@19	492	T_{1}(\mathfrak{m})\times T_{2}(\mathfrak{m})&\hspace{-0.15cm}\text{if }
Madarasi@19	493	T_{1}(\mathfrak{m})\!\neq\!\emptyset\ \text{and }T_{2}(\mathfrak{m})\!\neq
Madarasi@19	494	\emptyset,\\ (V_{1}\!\setminus\!\mathfrak{D}(\mathfrak{m}))\!\times\!(V_{2}\!\setminus\!\mathfrak{R}(\mathfrak{m}))
Madarasi@19	495	&\hspace{-0.15cm}\text{otherwise}.
alpar@2	496	\end{cases}
alpar@2	497	\]
alpar@2	498
alpar@2	499	\subsection{Consistency}
Madarasi@28	500	Let $p=(u,v)\in V_{1}\times V_{2}$, and suppose $\mathfrak{m}$ is a consistent mapping by
Madarasi@19	501	$PT$. $Cons_{PT}(p,\mathfrak{m})$ checks whether
Madarasi@28	502	adding pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
Madarasi@15	503
Madarasi@28	504	For example, the consistency function of the induced subgraph isomorphism problem is the following.
alpar@2	505	\begin{notation}
Madarasi@19	506	Let $\mathbf{\Gamma_{1} (u)}:=\{\tilde{u}\in V_{1} :
Madarasi@19	507	(u,\tilde{u})\in E_{1}\}$, and $\mathbf{\Gamma_{2}
Madarasi@28	508	(v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$. That is, $\mathbf{\Gamma_{i} (w)}$ denotes the set of neighbours of node $w$ in $G_i$ $(i=1,2)$.
alpar@2	509	\end{notation}
alpar@2	510
alpar@2	511	\begin{claim}
Madarasi@28	512	$extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ if and only if $\mathfrak{m}$ is consistent and $(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
Madarasi@28	513	\Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$.
Madarasi@28	514	\end{claim}
Madarasi@28	515
Madarasi@28	516	The following formulation gives an efficient way of calculating $Cons_{IND}$.
Madarasi@28	517
Madarasi@28	518	\begin{claim}
Madarasi@28	519	$Cons_{IND}((u,v),\mathfrak{m}):=Cons_{IND}(\mathfrak{m})\wedge\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
Madarasi@19	520	(\forall \tilde{u}\in \Gamma_{1}(u)
Madarasi@28	521	\cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is the consistency function for $IND$.
alpar@2	522	\end{claim}
alpar@2	523
alpar@2	524	\subsection{Cutting rules}
Madarasi@19	525	$Cut_{PT}(p,\mathfrak{m})$ is defined by a collection of efficiently
Madarasi@19	526	verifiable conditions. The requirement is that $Cut_{PT}(p,\mathfrak{m})$ can
Madarasi@19	527	be true only if it is impossible to extend $extend(\mathfrak{m},p)$ to a
alpar@3	528	whole mapping.
Madarasi@15	529
Madarasi@28	530	As an example, a cutting function of induced subgraph isomorphism problem is presented.
alpar@2	531	\begin{notation}
Madarasi@19	532	Let $\mathbf{\tilde{T}_{1}}(\mathfrak{m}):=(V_{1}\backslash
Madarasi@19	533	\mathfrak{D}(\mathfrak{m}))\backslash T_{1}(\mathfrak{m})$, and
Madarasi@19	534	\\ $\mathbf{\tilde{T}_{2}}(\mathfrak{m}):=(V_{2}\backslash
Madarasi@19	535	\mathfrak{R}(\mathfrak{m}))\backslash T_{2}(\mathfrak{m})$.
alpar@2	536	\end{notation}
Madarasi@15	537
alpar@2	538	\begin{claim}
Madarasi@19	539	$Cut_{IND}((u,v),\mathfrak{m}):= \|\Gamma_{2} (v)\ \cap\ T_{2}(\mathfrak{m})\| <
Madarasi@19	540	\|\Gamma_{1} (u)\ \cap\ T_{1}(\mathfrak{m})\| \vee \|\Gamma_{2}(v)\cap
Madarasi@19	541	\tilde{T}_{2}(\mathfrak{m})\| < \|\Gamma_{1}(u)\cap
Madarasi@28	542	\tilde{T}_{1}(\mathfrak{m})\|$ is a cutting function for $IND$.
alpar@2	543	\end{claim}
alpar@2	544
Madarasi@22	545	\section{The VF2++ Algorithm}\label{sec:VF2ppAlg}
Madarasi@28	546	Although any matching order makes the search space of VF2 a
alpar@3	547	tree, its choice turns out to dramatically influence the number of
alpar@3	548	visited states. The goal is to determine an efficient one as quickly
alpar@3	549	as possible.
alpar@2	550
Madarasi@28	551	The main reason for the superiority of VF2++ over VF2 is twofold. Firstly,
alpar@29	552	taking into account the structure and the node labeling of the graph,
Madarasi@28	553	VF2++ determines a matching order in which most of the unfruitful
alpar@3	554	branches of the search space can be pruned immediately. Secondly,
alpar@3	555	introducing more efficient --- nevertheless still easier to compute
alpar@3	556	--- cutting rules reduces the chance of going astray even further.
alpar@2	557
Madarasi@28	558	In addition to the usual subgraph isomorphism problem, specialized versions
alpar@29	559	for induced subgraph and graph isomorphism problems have also been
Madarasi@22	560	designed.
alpar@2	561
Madarasi@28	562	Note that a weaker version of the cutting rules of VF2++ and an efficient
Madarasi@28	563	candidate set calculation method were described in~\cite{VF2Plus}.
alpar@2	564
alpar@3	565	It should be noted that all the methods described in this section are
Madarasi@22	566	extendable to handle directed graphs and edge labels as well.
alpar@3	567	The basic ideas and the detailed description of VF2++ are provided in
Madarasi@22	568	the following.\newline
alpar@2	569
Madarasi@19	570	The goal is to find a matching order in which the algorithm is able to
Madarasi@19	571	recognize inconsistency or prune the infeasible branches on the
Madarasi@19	572	highest levels and goes deep only if it is needed.
Madarasi@19	573
Madarasi@19	574	\begin{notation}
Madarasi@28	575	Let $\mathbf{Conn_{H}(u)}:=\|\Gamma_{1}(u)\cap H\|$, that is the
Madarasi@19	576	number of neighbours of u which are in H, where $u\in V_{1} $ and
Madarasi@19	577	$H\subseteq V_{1}$.
Madarasi@19	578	\end{notation}
Madarasi@19	579
Madarasi@19	580	The principal question is the following. Suppose a mapping $\mathfrak{m}$ is
Madarasi@19	581	given. For which node of $T_{1}(\mathfrak{m})$ is the hardest to find a
alpar@29	582	consistent pair in $G_{2}$? The more covered neighbors a node in
Madarasi@19	583	$T_{1}(\mathfrak{m})$ has --- i.e. the largest $Conn_{\mathfrak{D}(\mathfrak{m})}$ it has
alpar@29	584	---, the more rare-to-satisfy consistency constraints for its pair
Madarasi@19	585	are given.
Madarasi@19	586
Madarasi@28	587	Most of the graphs of biological and chemical structures are sparse, thus several nodes in
Madarasi@19	588	$T_{1}(\mathfrak{m})$ may have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$, which makes
Madarasi@19	589	reasonable to define a secondary and a tertiary order between them.
Madarasi@19	590	The observation above proves itself to be as determining, that the
alpar@29	591	secondary ordering prefers nodes with the most uncovered neighbors
Madarasi@19	592	among which have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$ to increase
Madarasi@28	593	$Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes as much, as possible. The tertiary ordering prefers nodes having the rarest uncovered labels in $G_2$.
Madarasi@19	594
Madarasi@28	595	Note that the secondary ordering is the same as ordering by degrees,
Madarasi@19	596	which is a static data in front of the above used.
Madarasi@19	597
Madarasi@19	598	These rules can easily result in a matching order which contains the
Madarasi@19	599	nodes of a long path successively, whose nodes may have low $Conn$ and
alpar@29	600	is easily to match into $G_{2}$. To try to avoid that, a
alpar@29	601	Breadth-First-Search order is used, and on each of its levels, the
alpar@29	602	ordering procedure described above is applied.
Madarasi@19	603	\newline
Madarasi@19	604
alpar@29	605	In the following, examples are shown, demonstrating that VF2 may be
alpar@29	606	slow are, even though a matching can be found easily by using a proper
Madarasi@19	607	matching order.
Madarasi@19	608
Madarasi@19	609	\begin{example}
Madarasi@19	610	Suppose $G_{1}$ can be mapped into $G_{2}$ in many ways
Madarasi@19	611	without node labels. Let $u\in V_{1}$ and $v\in V_{2}$.
Madarasi@19	612	\newline
Madarasi@19	613	$\mathcal{L}(u):=black$
Madarasi@19	614	\newline
Madarasi@19	615	$\mathcal{L}(v):=black$
Madarasi@19	616	\newline
Madarasi@22	617	$\mathcal{L}(\tilde{u}):=red \ \forall \tilde{u}\in V_{1}\backslash
Madarasi@22	618	\{u\}$
Madarasi@19	619	\newline
Madarasi@22	620	$\mathcal{L}(\tilde{v}):=red \ \forall \tilde{v}\in V_{2}\backslash
Madarasi@22	621	\{v\}$
Madarasi@19	622
Madarasi@19	623	Now, any mapping by $\mathcal{L}$ must contain $(u,v)$, since
Madarasi@19	624	$u$ is black and no node in $V_{2}$ has a black label except
Madarasi@19	625	$v$. If unfortunately $u$ were the last node which will get covered,
Madarasi@19	626	VF2 would check only in the last steps, whether $u$ can be matched to
Madarasi@19	627	$v$.
Madarasi@28	628
alpar@29	629	However, had $u$ been the first matched node, $u$ would have been
alpar@29	630	matched immediately to $v$, so all the mappings would have been
Madarasi@19	631	precluded in which node labels can not correspond.
Madarasi@19	632	\end{example}
Madarasi@19	633
Madarasi@19	634	\begin{example}
Madarasi@28	635	Suppose there is no node label given, and $G_{1}$ is a small graph that can not be mapped into $G_{2}$ and $u\in V_{1}$.
Madarasi@19	636	\newline
Madarasi@19	637	Let $G'_{1}:=(V_{1}\cup
Madarasi@19	638	\{u'_{1},u'_{2},..,u'_{k}\},E_{1}\cup
Madarasi@19	639	\{(u,u'_{1}),(u'_{1},u'_{2}),..,(u'_{k-1},u'_{k})\})$, that is,
Madarasi@19	640	$G'_{1}$ is $G_{1}\cup \{ a\ k$ long path, which is disjoint
Madarasi@19	641	from $G_{1}$ and one of its starting points is connected to $u\in
Madarasi@19	642	V_{1}\}$.
Madarasi@28	643
alpar@29	644	If, unfortunately, the nodes of the path were the first $k$ nodes in the
alpar@29	645	matching order, the algorithm would iterate through all the possible $k$
Madarasi@19	646	long paths in $G_{2}$, and it would recognize that no path can be
Madarasi@19	647	extended to $G'_{1}$.
Madarasi@19	648	\newline
Madarasi@19	649	However, had it started by the matching of $G_{1}$, it would not
Madarasi@19	650	have matched any nodes of the path.
Madarasi@19	651	\end{example}
Madarasi@19	652
Madarasi@19	653	These examples may look artificial, but the same problems also appear
Madarasi@19	654	in real-world instances, even though in a less obvious way.
Madarasi@19	655
Madarasi@28	656	%\subsection{Preparations}
Madarasi@28	657	%\begin{claim}
Madarasi@28	658	%\label{claim:claimCoverFromLeft}
Madarasi@28	659	%The total ordering relation uniquely determines a node order, in which
Madarasi@28	660	%the nodes of $V_{1}$ will be covered by VF2. From the point of
Madarasi@28	661	%view of the matching procedure, this means, that always the same node
Madarasi@28	662	%of $G_{1}$ will be covered on the $d$-th level.
Madarasi@28	663	%\end{claim}
alpar@2	664
Madarasi@28	665	%\begin{definition}
Madarasi@28	666	%An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(\|V_{1}\|)})$ of
Madarasi@28	667	%$V_{1}$ is \textbf{matching order} if there exists $\prec$ total
Madarasi@28	668	%ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
Madarasi@28	669	%pair for $u_{\sigma(d)}$ for all $d\in\{1,..,\|V_{1}\|\}$.
Madarasi@28	670	%\end{definition}
alpar@2	671
Madarasi@28	672	%\begin{claim}\label{claim:MOclaim}
Madarasi@28	673	%A total ordering is matching order iff the nodes of every component
Madarasi@28	674	%form an interval in the node sequence, and every node connects to a
Madarasi@28	675	%previous node in its component except the first node of each component.
Madarasi@28	676	%\end{claim}
alpar@2	677
Madarasi@28	678	%In summary, a total ordering always uniquely determines a matching
Madarasi@28	679	%order, and every matching order can be determined by a total ordering,
Madarasi@28	680	%however, more than one different total orderings may determine the
Madarasi@28	681	%same matching order.
alpar@2	682
Madarasi@28	683	\subsection{Matching order}
alpar@2	684	\begin{notation}
Madarasi@19	685	Let \textbf{F$_\mathcal{M}$(l)}$:=\|\{v\in V_{2} :
Madarasi@28	686	l=\mathcal{L}(v)\}\|-\|\{u\in \mathcal{M} : l=\mathcal{L}(u)\}\|$,
Madarasi@19	687	where $l$ is a label and $\mathcal{M}\subseteq V_{1}$.
alpar@2	688	\end{notation}
alpar@2	689
Madarasi@28	690	\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{(-f)}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
alpar@2	691	\end{definition}
alpar@2	692
Madarasi@28	693	\begin{notation}
Madarasi@28	694	Let $deg(v)$ denote the degree of node $v$.
Madarasi@28	695	\end{notation}
Madarasi@28	696
Madarasi@28	697	\begin{algorithm}[H]
Madarasi@8	698	\algtext*{EndIf}
Madarasi@8	699	\algtext*{EndProcedure}
alpar@2	700	\algtext*{EndWhile}
Madarasi@13	701	\algtext*{EndFor}
alpar@2	702	\caption{\hspace{0.5cm}$The\ method\ of\ VF2++\ for\ determining\ the\ node\ order$}\label{alg:VF2PPPseu}
alpar@2	703	\begin{algorithmic}[1]
alpar@3	704	\Procedure{VF2++order}{} \State $\mathcal{M}$ := $\emptyset$
Madarasi@19	705	\Comment{matching order} \While{$V_{1}\backslash \mathcal{M}
alpar@3	706	\neq\emptyset$} \State $r\in$ arg max$_{deg}$ (arg
Madarasi@19	707	min$_{F_\mathcal{M}\circ \mathcal{L}}(V_{1}\backslash
alpar@3	708	\mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
alpar@3	709	root node $r$. \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
Madarasi@28	710	$d$-th level \State Process $V_d$ \Comment{See Algorithm~\ref{alg:VF2PPProcess1}} \EndFor
alpar@3	711	\EndWhile \EndProcedure
alpar@2	712	\end{algorithmic}
alpar@2	713	\end{algorithm}
alpar@2	714
alpar@2	715	\begin{algorithm}
Madarasi@8	716	\algtext*{EndIf}
Madarasi@8	717	\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2	718	\algtext*{EndWhile}
Madarasi@8	719	\caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
alpar@2	720	\begin{algorithmic}[1]
Madarasi@17	721	\Procedure{VF2++ProcessLevel}{$V_{d}$} \While{$V_d\neq\emptyset$}
Madarasi@28	722	\State $m\in$ arg min$_{F_\mathcal{M}\circ\ \mathcal{L}}($ arg max$_{deg}($arg
alpar@3	723	max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
alpar@3	724	\State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
alpar@3	725	$F_\mathcal{M}$ \EndWhile \EndProcedure
alpar@2	726	\end{algorithmic}
alpar@2	727	\end{algorithm}
alpar@2	728
Madarasi@28	729	Algorithm~\ref{alg:VF2PPPseu} is a high-level description of the
alpar@4	730	matching order procedure of VF2++. It computes a BFS tree for each
Madarasi@19	731	component in ascending order of their rarest node labels and largest $deg$,
Madarasi@28	732	whose root vertex is the minimal node of its component. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
Madarasi@8	733	lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
Madarasi@8	734	to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
alpar@2	735
Madarasi@28	736	\begin{claim}
Madarasi@28	737	Algorithm~\ref{alg:VF2PPPseu} provides a matching order.
Madarasi@28	738	\end{claim}
alpar@2	739
alpar@2	740
alpar@2	741	\subsection{Cutting rules}
alpar@2	742	\label{VF2PPCuttingRules}
Madarasi@19	743	This section presents the cutting rules of VF2++, which are improved by using extra information coming from the node labels.
alpar@2	744	\begin{notation}
Madarasi@19	745	Let $\mathbf{\Gamma_{1}^{l}(u)}:=\{\tilde{u} : \mathcal{L}(\tilde{u})=l
Madarasi@19	746	\wedge \tilde{u}\in \Gamma_{1} (u)\}$ and
Madarasi@19	747	$\mathbf{\Gamma_{2}^{l}(v)}:=\{\tilde{v} : \mathcal{L}(\tilde{v})=l \wedge
Madarasi@19	748	\tilde{v}\in \Gamma_{2} (v)\}$, where $u\in V_{1}$, $v\in
Madarasi@19	749	V_{2}$ and $l$ is a label.
alpar@2	750	\end{notation}
alpar@2	751
Madarasi@28	752	\begin{claim}[Cutting function for ISO]
Madarasi@28	753	\[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}\|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})\|\!\neq\!\|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})\|\ \vee\]\[\bigvee_{l\ is\ label} \newline \|\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})\| \neq \|\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})\|\] is a cutting function for ISO.
alpar@2	754	\end{claim}
Madarasi@13	755
Madarasi@28	756	\begin{claim}[Cutting function for IND]
Madarasi@28	757	\[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}\|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})\|\!<\!\|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})\|\ \vee\]\[\bigvee_{l\ is\ label} \newline \|\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})\| < \|\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})\|\] is a cutting function for IND.
Madarasi@28	758	\end{claim}
Madarasi@28	759
Madarasi@28	760	\begin{claim}[Cutting function for SUB]
Madarasi@28	761	\[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}\|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})\|\!<\!\|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})\|\] is a cutting function for SUB.
Madarasi@19	762	\end{claim}
alpar@2	763
Madarasi@19	764
Madarasi@19	765
Madarasi@22	766	\section{Implementation details}\label{sec:VF2ppImpl}
alpar@3	767	This section provides a detailed summary of an efficient
alpar@3	768	implementation of VF2++.
Madarasi@28	769	\begin{notation}
Madarasi@28	770	Let $\Delta_1$ and $\Delta_2$ denote the largest degree in $G_1$ and $G_2$, respectively, and let $\Delta=\max\{\Delta_1,\Delta_2\}$.
Madarasi@28	771	\end{notation}
Madarasi@22	772	\subsection{Storing a mapping}
alpar@3	773	After fixing an arbitrary node order ($u_0, u_1, ..,
Madarasi@28	774	u_{\|V_{1}\|-1}$) of $G_{1}$, an array $M$ can be used to store
alpar@3	775	the current mapping in the following way.
alpar@2	776	\[
alpar@3	777	M[i] =
alpar@2	778	\begin{cases}
Madarasi@19	779	v & if\ (u_i,v)\ is\ in\ the\ mapping\\ INV\!ALI\!D &
Madarasi@17	780	if\ no\ node\ has\ been\ mapped\ to\ u_i,
alpar@2	781	\end{cases}
alpar@2	782	\]
Madarasi@28	783	where $i\in\{0,1, ..,\|V_{1}\|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
alpar@3	784	means "no node".
Madarasi@22	785	\subsection{Avoiding the recurrence}
alpar@4	786	The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
Madarasi@9	787	as a \textit{while loop}, which has a loop counter $depth$ denoting the
Madarasi@28	788	current depth of the recursion. Fixing a matching order, let $M$
Madarasi@28	789	denote the array storing the current mapping. Observe that
Madarasi@19	790	$M$ is $INV\!ALI\!D$ from index $depth$+1 and not $INV\!ALI\!D$ before
Madarasi@9	791	$depth$. $M[depth]$ changes
alpar@3	792	while the state is being processed, but the property is held before
alpar@3	793	both stepping back to a predecessor state and exploring a successor
alpar@3	794	state.
alpar@2	795
alpar@29	796	The necessary part of the candidate set is easy to maintain or
alpar@29	797	compute by following
Madarasi@28	798	the steps described in Section~\ref{candidateComputingVF2}. A much faster method
Madarasi@28	799	has been designed for biological and sparse graphs, see the next
alpar@3	800	section for details.
alpar@2	801
Madarasi@22	802	\subsection{Calculating the candidates for a node}
Madarasi@28	803	The task is not to maintain the candidate set, but to generate the
Madarasi@19	804	candidate nodes in $G_{2}$ for a given node $u\in V_{1}$. In
Madarasi@20	805	case of any of the three problem types and a mapping $\mathfrak{m}$, if a node $v\in
Madarasi@19	806	V_{2}$ is a potential pair of $u\in V_{1}$, then $\forall
Madarasi@20	807	u'\in \mathfrak{D}(\mathfrak{m}) : (u,u')\in
Madarasi@20	808	E_{1}\Rightarrow (v,\mathfrak{m}(u'))\in
alpar@29	809	E_{2}$. That is, each covered neighbor of $u$ has to be mapped to
alpar@29	810	a covered neighbor of $v$, i.e. selecting arbitrarily a covered neighbor $u'$ of $u$, all of the admissible candidates for $u$ are among the neighbors of $\mathfrak{m}(u')$.
alpar@2	811
Madarasi@28	812	Having said that, an algorithm running in $\Theta(\Delta_2)$ time is
alpar@3	813	describable if there exists a covered node in the component containing
Madarasi@17	814	$u$, and a linear one otherwise.
alpar@2	815
alpar@2	816
Madarasi@22	817	\subsection{Determining the node order}
alpar@3	818	This section describes how the node order preprocessing method of
alpar@3	819	VF2++ can efficiently be implemented.
alpar@2	820
alpar@3	821	For using lookup tables, the node labels are associated with the
alpar@3	822	numbers $\{0,1,..,\|K\|-1\}$, where $K$ is the set of the labels. It
Madarasi@9	823	enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
alpar@3	824	$\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
Madarasi@28	825	in $V_{2}$ having label $i$, which is easy to compute in
Madarasi@28	826	$\Theta(\|V_{2}\|)$ steps.
alpar@2	827
Madarasi@19	828	Representing $\mathcal{M}\subseteq V_{1}$ as an array of
Madarasi@28	829	size $\|V_{1}\|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done in-place by swapping nodes.
alpar@2	830
Madarasi@22	831	\subsection{Cutting rules}
alpar@29	832	Section~\ref{VF2PPCuttingRules} described the cutting rules
alpar@29	833	using the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
alpar@29	834	and $\tilde T_{2}$, which are dependent on the current mapping. The aim is to check the labeled cutting
Madarasi@28	835	rules of VF2++ in $\Theta(\Delta)$ time.
alpar@2	836
alpar@29	837	Firstly, suppose that these four sets are given a way, that
alpar@3	838	checking whether a node is in a certain set takes constant time,
alpar@3	839	e.g. they are given by their 0-1 characteristic vectors. Let $L$ be an
alpar@3	840	initially zero integer lookup table of size $\|K\|$. After incrementing
Madarasi@19	841	$L[\mathcal{L}(u')]$ for all $u'\in \Gamma_{1}(u) \cap T_{1}(\mathfrak{m})$ and
Madarasi@19	842	decrementing $L[\mathcal{L}(v')]$ for all $v'\in\Gamma_{2} (v) \cap
alpar@29	843	T_{2}(\mathfrak{m})$, the first part of the cutting rules can be checked in
Madarasi@28	844	$\Theta(\Delta)$ time by considering the proper signs of $L$. Setting $L$
Madarasi@28	845	to zero takes $\Theta(\Delta)$ time again, which makes it possible to use
Madarasi@9	846	the same table through the whole algorithm. The second part of the
alpar@3	847	cutting rules can be verified using the same method with $\tilde
Madarasi@19	848	T_{1}$ and $\tilde T_{2}$ instead of $T_{1}$ and
Madarasi@28	849	$T_{2}$. Thus, the overall time complexity is $\Theta(\Delta)$.
alpar@2	850
Madarasi@28	851	To maintain the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
Madarasi@28	852	and $\tilde T_{2}$, two other integer lookup tables storing the number of covered neighbours of the nodes of the two graphs can be used. This representation allows constant-time membership checking, furthermore it is maintainable in $\Theta(\Delta)$ time whenever a node pair is added or subtracted by incrementing
alpar@3	853	or decrementing the proper indices. A further improvement is that the
Madarasi@19	854	values of $L[\mathcal{L}(u')]$ in case of checking $u$ are dependent only on
Madarasi@28	855	$u$, i.e. on the current depth of the recursion, so for each $u\in V_{1}$, an array of pairs \textit{(label, number of such labels)} can store $L$. Note that these arrays are at most of size
Madarasi@28	856	$\Delta_1$ if pairs with non-appearing node labels are discarded.
alpar@2	857
Madarasi@19	858	Using similar techniques, the consistency function can be evaluated in
Madarasi@28	859	$\Theta(\Delta)$ steps, as well.
alpar@2	860
Madarasi@22	861	\section{Experimental results}\label{sec:ExpRes}
Madarasi@28	862	This section compares the performance of VF2++ and VF2~Plus. According to
Madarasi@19	863	our experience, both algorithms run faster than VF2 with orders of
Madarasi@19	864	magnitude, thus its inclusion was not reasonable.
alpar@2	865
alpar@29	866	The algorithms were implemented in C++ using the open-source LEMON
alpar@29	867	graph and network optimization
alpar@29	868	library~\cite{LEMON}\cite{o11:_lemon_open_sourc_c_graph_templ_librar}. The
alpar@29	869	tests were carried out on a Linux-based system with an Intel i7 X980
alpar@29	870	3.33 GHz CPU and 6 GB of RAM.
alpar@29	871
alpar@2	872	\subsection{Biological graphs}
alpar@3	873	The tests have been executed on a recent biological dataset created
alpar@3	874	for the International Contest on Pattern Search in Biological
Madarasi@28	875	Databases~\cite{Content}, which has been constructed of molecule,
Madarasi@7	876	protein and contact map graphs extracted from the Protein Data
Madarasi@28	877	Bank~\cite{ProteinDataBank}.
alpar@2	878
alpar@3	879	The molecule dataset contains small graphs with less than 100 nodes
alpar@3	880	and an average degree of less than 3. The protein dataset contains
alpar@3	881	graphs having 500-10 000 nodes and an average degree of 4, while the
alpar@3	882	contact map dataset contains graphs with 150-800 nodes and an average
alpar@3	883	degree of 20. \\
alpar@2	884
Madarasi@28	885	In the following, both the induced subgraph and the graph
Madarasi@28	886	isomorphism problems will be examined.
alpar@29	887	This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For the running times, please see Figure~\ref{fig:bioIND}.
Madarasi@7	888
Madarasi@7	889	In an other experiment, the nodes of each graph in the database had been
Madarasi@7	890	shuffled, and an isomorphism between the shuffled and the original
Madarasi@28	891	graph was searched. The running times are shown on Figure~\ref{fig:bioISO}.
Madarasi@7	892
Madarasi@7	893
Madarasi@17	894	\begin{figure}[H]
Madarasi@17	895	\vspace*{-2cm}
Madarasi@17	896	\hspace*{-1.5cm}
Madarasi@17	897	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17	898	\begin{figure}[H]
Madarasi@17	899	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28	900	\begin{axis}[title=Molecules IND,xlabel={$\|V_2\|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17	901	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17	902	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	903	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	904	format/1000 sep = \kern 0.08em}]
Madarasi@17	905	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17	906	\addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
Madarasi@17	907	size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
Madarasi@17	908	\end{axis}
Madarasi@17	909	\end{tikzpicture}
Madarasi@17	910	\caption{In the case of molecules, the algorithms have
Madarasi@17	911	similar behaviour, but VF2++ is almost two times faster even on such
Madarasi@17	912	small graphs.} \label{fig:INDMolecule}
Madarasi@17	913	\end{figure}
Madarasi@17	914	\end{subfigure}
Madarasi@17	915	\hspace*{1.5cm}
Madarasi@17	916	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17	917	\begin{figure}[H]
Madarasi@17	918	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28	919	\begin{axis}[title=Contact maps IND,xlabel={$\|V_2\|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17	920	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17	921	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	922	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	923	format/1000 sep = \kern 0.08em}]
Madarasi@17	924	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17	925	\addplot table {Orig/ContactMaps.128.txt};
Madarasi@17	926	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
Madarasi@17	927	{VF2PPLabel/ContactMaps.128.txt};
Madarasi@17	928	\end{axis}
Madarasi@17	929	\end{tikzpicture}
Madarasi@17	930	\caption{On contact maps, VF2++ runs almost in constant time, while VF2
alpar@29	931	Plus has a near-linear behavior.} \label{fig:INDContact}
Madarasi@17	932	\end{figure}
Madarasi@17	933	\end{subfigure}
Madarasi@17	934
Madarasi@17	935	\begin{center}
Madarasi@17	936	\vspace*{-0.5cm}
Madarasi@17	937	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17	938	\begin{figure}[H]
Madarasi@17	939	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28	940	\begin{axis}[title=Proteins IND,xlabel={$\|V_2\|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17	941	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17	942	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	943	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	944	format/1000 sep = \kern 0.08em}] %\addplot+[only marks] table
Madarasi@17	945	{proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
Madarasi@17	946	table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
Madarasi@17	947	size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
Madarasi@17	948	\end{axis}
Madarasi@17	949	\end{tikzpicture}
Madarasi@28	950	\caption{Both of the algorithms have linear behaviour on protein
Madarasi@17	951	graphs. VF2++ is more than 10 times faster than VF2
Madarasi@17	952	Plus.} \label{fig:INDProt}
Madarasi@17	953	\end{figure}
Madarasi@17	954	\end{subfigure}
Madarasi@17	955	\end{center}
Madarasi@17	956	\vspace*{-0.5cm}
Madarasi@28	957	\caption{\normalsize{Induced subgraph isomorphism problem on biological graphs}}\label{fig:bioIND}
Madarasi@17	958	\end{figure}
Madarasi@17	959
alpar@2	960
alpar@2	961	\begin{figure}[H]
Madarasi@7	962	\vspace*{-2cm}
Madarasi@7	963	\hspace*{-1.5cm}
Madarasi@7	964	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	965	\begin{figure}[H]
Madarasi@7	966	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28	967	\begin{axis}[title=Molecules ISO,xlabel={$\|V_2\|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7	968	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7	969	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	970	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	971	format/1000 sep = \kern 0.08em}]
Madarasi@7	972	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7	973	\addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7	974	size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
Madarasi@7	975	\end{axis}
Madarasi@7	976	\end{tikzpicture}
Madarasi@28	977	\caption{The results are close to each other on contact maps, but VF2++ seems to be slightly faster as the number of nodes increases.
Madarasi@28	978	}\label{fig:ISOMolecule}
Madarasi@7	979	\end{figure}
Madarasi@7	980	\end{subfigure}
Madarasi@7	981	\hspace*{1.5cm}
Madarasi@7	982	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	983	\begin{figure}[H]
Madarasi@7	984	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28	985	\begin{axis}[title=Contact maps ISO,xlabel={$\|V_2\|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7	986	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7	987	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	988	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	989	format/1000 sep = \kern 0.08em}]
Madarasi@7	990	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7	991	\addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7	992	size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
Madarasi@7	993	\end{axis}
Madarasi@7	994	\end{tikzpicture}
Madarasi@28	995	\caption{In the case of molecules, there is no significant
Madarasi@28	996	difference, but VF2++ performs consistently better.}\label{fig:ISOContact}
Madarasi@7	997	\end{figure}
Madarasi@7	998	\end{subfigure}
Madarasi@7	999
alpar@2	1000	\begin{center}
Madarasi@7	1001	\vspace*{-0.5cm}
Madarasi@7	1002	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	1003	\begin{figure}[H]
Madarasi@7	1004	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@28	1005	\begin{axis}[title=Proteins ISO,xlabel={$\|V_2\|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7	1006	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7	1007	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1008	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1009	format/1000 sep = \kern 0.08em}]
Madarasi@7	1010	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7	1011	\addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7	1012	size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
Madarasi@7	1013	\end{axis}
Madarasi@7	1014	\end{tikzpicture}
Madarasi@28	1015	\caption{On protein graphs, VF2~Plus has a super linear time
Madarasi@7	1016	complexity, while VF2++ runs in near constant time. The difference
Madarasi@28	1017	is about two orders of magnitude on large graphs.}\label{fig:ISOProt}
Madarasi@7	1018	\end{figure}
Madarasi@7	1019	\end{subfigure}
Madarasi@7	1020	\end{center}
Madarasi@7	1021	\vspace*{-0.6cm}
Madarasi@28	1022	\caption{\normalsize{Graph isomorphism problem on biological graphs}}\label{fig:bioISO}
Madarasi@7	1023	\end{figure}
Madarasi@7	1024
Madarasi@7	1025
alpar@2	1026
alpar@2	1027
alpar@2	1028	\subsection{Random graphs}
Madarasi@28	1029	This section compares VF2++ with VF2~Plus on random graphs of large
alpar@3	1030	size. The node labels are uniformly distributed. Let $\delta$ denote
alpar@3	1031	the average degree. For the parameters of problems solved in the
alpar@3	1032	experiments, please see the top of each chart.
Madarasi@28	1033	\subsubsection{Graph isomorphism problem}
alpar@3	1034	To evaluate the efficiency of the algorithms in the case of graph
Madarasi@28	1035	isomorphism problem, random connected graphs of less than 20 000 nodes have been
alpar@3	1036	considered. Generating a random graph and shuffling its nodes, an
alpar@29	1037	isomorphism had to be found. Figure~\ref{fig:randISO} shows the running times
alpar@4	1038	on graph sets of various density.
alpar@2	1039
Madarasi@7	1040
Madarasi@7	1041
Madarasi@7	1042
Madarasi@12	1043	\begin{figure}
Madarasi@7	1044	\vspace*{-1.5cm}
Madarasi@7	1045	\hspace*{-1.5cm}
Madarasi@7	1046	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1047	\begin{center}
alpar@2	1048	\begin{tikzpicture}
Madarasi@28	1049	\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1050	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1051	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1052	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1053	format/1000 sep = \kern 0.08em}]
alpar@2	1054	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1055	\addplot table {randGraph/iso/vf2pIso5_1.txt};
alpar@3	1056	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1057	{randGraph/iso/vf2ppIso5_1.txt};
alpar@2	1058	\end{axis}
alpar@2	1059	\end{tikzpicture}
alpar@2	1060	\end{center}
Madarasi@7	1061	\end{subfigure}
Madarasi@7	1062	%\hspace{1cm}
Madarasi@7	1063	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1064	\begin{center}
alpar@2	1065	\begin{tikzpicture}
Madarasi@28	1066	\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1067	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1068	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1069	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1070	format/1000 sep = \kern 0.08em}]
alpar@2	1071	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1072	\addplot table {randGraph/iso/vf2pIso10_1.txt};
alpar@3	1073	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1074	{randGraph/iso/vf2ppIso10_1.txt};
alpar@2	1075	\end{axis}
alpar@2	1076	\end{tikzpicture}
alpar@2	1077	\end{center}
Madarasi@7	1078	\end{subfigure}
Madarasi@7	1079	%%\hspace{1cm}
Madarasi@7	1080	\hspace*{-1.5cm}
Madarasi@7	1081	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1082	\begin{center}
alpar@2	1083	\begin{tikzpicture}
Madarasi@28	1084	\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1085	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1086	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1087	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1088	format/1000 sep = \kern 0.08em}]
alpar@2	1089	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1090	\addplot table {randGraph/iso/vf2pIso15_1.txt};
alpar@3	1091	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1092	{randGraph/iso/vf2ppIso15_1.txt};
alpar@2	1093	\end{axis}
alpar@2	1094	\end{tikzpicture}
alpar@2	1095	\end{center}
Madarasi@7	1096	\end{subfigure}
Madarasi@7	1097	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1098	\begin{center}
alpar@2	1099	\begin{tikzpicture}
Madarasi@28	1100	\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1101	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1102	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1103	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1104	format/1000 sep = \kern 0.08em}]
alpar@2	1105	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1106	\addplot table {randGraph/iso/vf2pIso100_1.txt};
alpar@3	1107	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1108	{randGraph/iso/vf2ppIso100_1.txt};
alpar@2	1109	\end{axis}
alpar@2	1110	\end{tikzpicture}
Madarasi@23	1111	\end{center}
Madarasi@7	1112	\end{subfigure}
alpar@2	1113	\vspace*{-0.8cm}
Madarasi@28	1114	\caption{Graph isomorphism problem on random graphs
Madarasi@23	1115	}\label{fig:randISO}
alpar@2	1116	\end{figure}
alpar@2	1117
alpar@2	1118
Madarasi@28	1119	\subsubsection{Induced subgraph isomorphism problem}
Madarasi@28	1120	This section presents a comparison of VF2++ and VF2~Plus in the case
Madarasi@28	1121	of induced subgraph isomorphism problem. In addition to the size of graph $G_2$, that of $G_1$ dramatically influences the hardness of
Madarasi@28	1122	a given problem too, so the overall picture is provided by examining graphs to be embedded of various size.
alpar@2	1123
Madarasi@17	1124	For each chart, a number $0<\rho< 1$ has been fixed, and the following
Madarasi@19	1125	has been executed 150 times. Generating a large graph $G_{2}$ of an average degree of $\delta$,
Madarasi@28	1126	choose 10 of its induced subgraphs having $\rho\|V_{2}\|$ nodes,
Madarasi@28	1127	and for all the 10 subgraphs find a mapping by using both graph
alpar@3	1128	matching algorithms. The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
Madarasi@23	1129	0.3, 0.8$ cases have been examined, see
Madarasi@28	1130	Figure~\ref{fig:randIND5},~\ref{fig:randIND10}~and~\ref{fig:randIND35}.
alpar@2	1131
alpar@2	1132
alpar@2	1133
alpar@2	1134
alpar@2	1135
Madarasi@12	1136	\begin{figure}
Madarasi@7	1137	\vspace*{-1.5cm}
Madarasi@7	1138	\hspace*{-1.5cm}
alpar@2	1139	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1140	\begin{center}
alpar@2	1141	\begin{tikzpicture}
Madarasi@28	1142	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1143	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1144	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1145	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1146	format/1000 sep = \kern 0.08em}]
alpar@2	1147	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1148	\addplot table {randGraph/ind/vf2pInd5_0.05.txt};
alpar@3	1149	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1150	{randGraph/ind/vf2ppInd5_0.05.txt};
alpar@2	1151	\end{axis}
alpar@2	1152	\end{tikzpicture}
alpar@2	1153	\end{center}
alpar@2	1154	\end{subfigure}
alpar@2	1155	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1156	\begin{center}
alpar@2	1157	\begin{tikzpicture}
Madarasi@28	1158	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1159	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1160	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1161	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1162	format/1000 sep = \kern 0.08em}]
alpar@2	1163	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1164	\addplot table {randGraph/ind/vf2pInd5_0.1.txt};
alpar@3	1165	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1166	{randGraph/ind/vf2ppInd5_0.1.txt};
alpar@2	1167	\end{axis}
alpar@2	1168	\end{tikzpicture}
alpar@2	1169	\end{center}
alpar@2	1170	\end{subfigure}
Madarasi@7	1171	\hspace*{-1.5cm}
alpar@2	1172	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1173	\begin{center}
alpar@2	1174	\begin{tikzpicture}
Madarasi@28	1175	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1176	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1177	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1178	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1179	format/1000 sep = \kern 0.08em}]
alpar@2	1180	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1181	\addplot table {randGraph/ind/vf2pInd5_0.3.txt};
alpar@3	1182	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1183	{randGraph/ind/vf2ppInd5_0.3.txt};
alpar@2	1184	\end{axis}
alpar@2	1185	\end{tikzpicture}
alpar@2	1186	\end{center}
alpar@2	1187	\end{subfigure}
alpar@2	1188	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1189	\begin{center}
alpar@2	1190	\begin{tikzpicture}
Madarasi@28	1191	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1192	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1193	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1194	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1195	format/1000 sep = \kern 0.08em}]
alpar@2	1196	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1197	\addplot table {randGraph/ind/vf2pInd5_0.8.txt};
alpar@3	1198	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1199	{randGraph/ind/vf2ppInd5_0.8.txt};
alpar@2	1200	\end{axis}
alpar@2	1201	\end{tikzpicture}
Madarasi@23	1202	\end{center}
alpar@2	1203	\end{subfigure}
alpar@2	1204	\vspace*{-0.8cm}
Madarasi@28	1205	\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
Madarasi@28	1206	5}\label{fig:randIND5}
alpar@2	1207	\end{figure}
alpar@2	1208
alpar@2	1209
Madarasi@23	1210	\begin{figure}
Madarasi@7	1211	\vspace*{-1.5cm}
Madarasi@7	1212	\hspace*{-1.5cm}
alpar@2	1213	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1214	\begin{center}
Madarasi@7	1215	\hspace*{-0.5cm}
alpar@2	1216	\begin{tikzpicture}
Madarasi@28	1217	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1218	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1219	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1220	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1221	format/1000 sep = \kern 0.08em}]
alpar@2	1222	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1223	\addplot table {randGraph/ind/vf2pInd10_0.05.txt};
alpar@3	1224	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1225	{randGraph/ind/vf2ppInd10_0.05.txt};
alpar@2	1226	\end{axis}
alpar@2	1227	\end{tikzpicture}
alpar@2	1228	\end{center}
alpar@2	1229	\end{subfigure}
alpar@2	1230	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1231	\begin{center}
Madarasi@7	1232	\hspace*{-0.5cm}
alpar@2	1233	\begin{tikzpicture}
Madarasi@28	1234	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1235	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1236	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1237	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1238	format/1000 sep = \kern 0.08em}]
alpar@2	1239	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1240	\addplot table {randGraph/ind/vf2pInd10_0.1.txt};
alpar@3	1241	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1242	{randGraph/ind/vf2ppInd10_0.1.txt};
alpar@2	1243	\end{axis}
alpar@2	1244	\end{tikzpicture}
alpar@2	1245	\end{center}
alpar@2	1246	\end{subfigure}
Madarasi@7	1247	\hspace*{-1.5cm}
alpar@2	1248	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1249	\begin{center}
alpar@2	1250	\begin{tikzpicture}
Madarasi@28	1251	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1252	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1253	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1254	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1255	format/1000 sep = \kern 0.08em}]
alpar@2	1256	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1257	\addplot table {randGraph/ind/vf2pInd10_0.3.txt};
alpar@3	1258	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1259	{randGraph/ind/vf2ppInd10_0.3.txt};
alpar@2	1260	\end{axis}
alpar@2	1261	\end{tikzpicture}
alpar@2	1262	\end{center}
alpar@2	1263	\end{subfigure}
alpar@2	1264	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1265	\begin{center}
alpar@2	1266	\begin{tikzpicture}
Madarasi@28	1267	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1268	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1269	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1270	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1271	format/1000 sep = \kern 0.08em}]
alpar@2	1272	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1273	\addplot table {randGraph/ind/vf2pInd10_0.8.txt};
alpar@3	1274	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1275	{randGraph/ind/vf2ppInd10_0.8.txt};
alpar@2	1276	\end{axis}
alpar@2	1277	\end{tikzpicture}
Madarasi@23	1278	\end{center}
alpar@2	1279	\end{subfigure}
alpar@2	1280	\vspace*{-0.8cm}
Madarasi@28	1281	\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
Madarasi@28	1282	10}\label{fig:randIND10}
alpar@2	1283	\end{figure}
alpar@2	1284
alpar@2	1285
alpar@2	1286
Madarasi@23	1287	\begin{figure}
Madarasi@7	1288	\vspace*{-1.5cm}
Madarasi@7	1289	\hspace*{-1.5cm}
alpar@2	1290	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1291	\begin{center}
alpar@2	1292	\begin{tikzpicture}
Madarasi@28	1293	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1294	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1295	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1296	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1297	format/1000 sep = \kern 0.08em}]
alpar@2	1298	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1299	\addplot table {randGraph/ind/vf2pInd35_0.05.txt};
alpar@3	1300	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1301	{randGraph/ind/vf2ppInd35_0.05.txt};
alpar@2	1302	\end{axis}
alpar@2	1303	\end{tikzpicture}
alpar@2	1304	\end{center}
alpar@2	1305	\end{subfigure}
alpar@2	1306	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1307	\begin{center}
alpar@2	1308	\begin{tikzpicture}
Madarasi@28	1309	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1310	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1311	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1312	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1313	format/1000 sep = \kern 0.08em}]
alpar@2	1314	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1315	\addplot table {randGraph/ind/vf2pInd35_0.1.txt};
alpar@3	1316	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1317	{randGraph/ind/vf2ppInd35_0.1.txt};
alpar@2	1318	\end{axis}
alpar@2	1319	\end{tikzpicture}
alpar@2	1320	\end{center}
alpar@2	1321	\end{subfigure}
Madarasi@7	1322	\hspace*{-1.5cm}
alpar@2	1323	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1324	\begin{center}
alpar@2	1325	\begin{tikzpicture}
Madarasi@28	1326	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1327	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1328	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1329	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1330	format/1000 sep = \kern 0.08em}]
alpar@2	1331	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1332	\addplot table {randGraph/ind/vf2pInd35_0.3.txt};
alpar@3	1333	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1334	{randGraph/ind/vf2ppInd35_0.3.txt};
alpar@2	1335	\end{axis}
alpar@2	1336	\end{tikzpicture}
alpar@2	1337	\end{center}
alpar@2	1338	\end{subfigure}
alpar@2	1339	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1340	\begin{center}
alpar@2	1341	\begin{tikzpicture}
Madarasi@28	1342	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$\|V_2\|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1343	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1344	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@28	1345	format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
Madarasi@28	1346	format/1000 sep = \kern 0.08em}]
alpar@2	1347	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1348	\addplot table {randGraph/ind/vf2pInd35_0.8.txt};
alpar@3	1349	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1350	{randGraph/ind/vf2ppInd35_0.8.txt};
alpar@2	1351	\end{axis}
alpar@2	1352	\end{tikzpicture}
Madarasi@23	1353	\end{center}
alpar@2	1354	\end{subfigure}
alpar@2	1355	\vspace*{-0.8cm}
Madarasi@28	1356	\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
Madarasi@28	1357	35}\label{fig:randIND35}
alpar@2	1358	\end{figure}
alpar@2	1359
alpar@2	1360
alpar@29	1361	As the experiments above demonstrates, VF2++ is faster than VF2~Plus and able to
alpar@3	1362	handle really large graphs in milliseconds. Note that when $IND$ was
Madarasi@28	1363	considered and the graph to be embedded had proportionally few nodes ($\rho =
Madarasi@28	1364	0.05$, or $\rho = 0.1$), then VF2~Plus produced some inefficient node
alpar@4	1365	orders (e.g. see the $\delta=10$ case on
Madarasi@17	1366	Figure~\ref{fig:randIND10}). If these instances had been excluded, the
Madarasi@28	1367	charts would have looked similarly to the other ones.
alpar@3	1368	Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
alpar@29	1369	Plus slow down slightly, but remain practically usable even on graphs
alpar@3	1370	having 10 000 nodes.
alpar@2	1371
alpar@2	1372
alpar@2	1373
alpar@2	1374
alpar@3	1375
alpar@2	1376	\section{Conclusion}
alpar@29	1377	This paper presented VF2++, a new graph matching algorithm based on VF2, and analyzed it from a practical point of view.
alpar@2	1378
alpar@3	1379	Recognizing the importance of the node order and determining an
alpar@3	1380	efficient one, VF2++ is able to match graphs of thousands of nodes in
alpar@3	1381	near practically linear time including preprocessing. In addition to
Madarasi@28	1382	the proper order, VF2++ uses more efficient cutting
alpar@29	1383	rules, which are easy to compute and make the algorithm able to prune
alpar@3	1384	most of the unfruitful branches without going astray.
alpar@2	1385
alpar@3	1386	In order to show the efficiency of the new method, it has been
Madarasi@28	1387	compared to VF2~Plus~\cite{VF2Plus}, which is the best contemporary algorithm.
alpar@2	1388
Madarasi@28	1389	The experiments show that VF2++ consistently outperforms VF2~Plus on
alpar@3	1390	biological graphs. It seems to be asymptotically faster on protein and
Madarasi@28	1391	on contact map graphs in the case of induced subgraph isomorphism problem,
Madarasi@28	1392	while in the case of graph isomorphism problem, it has definitely better
alpar@29	1393	asymptotic behavior on protein graphs.
alpar@2	1394
alpar@3	1395	Regarding random sparse graphs, not only has VF2++ proved itself to be
alpar@29	1396	faster than VF2~Plus, but it also has a practically linear behavior both
Madarasi@28	1397	in the case of induced subgraph and graph isomorphism problems.
alpar@2	1398
alpar@25	1399	%%%%%%%%%%%%%%%%
alpar@25	1400	\section*{Acknowledgement} \label{sec:ack}
alpar@25	1401	%%%%%%%%%%%%%%%%
alpar@25	1402	This research project was initiated and sponsored by QuantumBio
Madarasi@28	1403	Inc.~\cite{QUANTUMBIO}.
alpar@25	1404
alpar@25	1405	The authors were supported by the Hungarian Scientific Research Fund -
alpar@25	1406	OTKA, K109240 and by the J\'anos Bolyai Research Fellowship program of
alpar@25	1407	the Hungarian Academy of Sciences.
alpar@2	1408
alpar@0	1409
alpar@0	1410	%% The Appendices part is started with the command \appendix;
alpar@0	1411	%% appendix sections are then done as normal sections
alpar@0	1412	%% \appendix
alpar@0	1413
alpar@0	1414	%% \section{}
alpar@0	1415	%% \label{}
alpar@0	1416
alpar@0	1417	%% If you have bibdatabase file and want bibtex to generate the
alpar@0	1418	%% bibitems, please use
alpar@0	1419	%%
alpar@3	1420	\bibliographystyle{elsarticle-num} \bibliography{bibliography}
alpar@0	1421
alpar@0	1422	%% else use the following coding to input the bibitems directly in the
alpar@0	1423	%% TeX file.
alpar@0	1424
alpar@2	1425	%% \begin{thebibliography}{00}
alpar@0	1426
alpar@2	1427	%% %% \bibitem{label}
alpar@2	1428	%% %% Text of bibliographic item
alpar@0	1429
alpar@2	1430	%% \bibitem{}
alpar@0	1431
alpar@2	1432	%% \end{thebibliography}
alpar@2	1433
alpar@0	1434	\end{document}
alpar@0	1435	\endinput
alpar@0	1436	%%
alpar@0	1437	%% End of file `elsarticle-template-num.tex'.
alpar@29	1438
alpar@29	1439	%% LocalWords: Subgraph Isomorphism datasets subgraphs subgraph FPT
alpar@29	1440	%% LocalWords: isomorphism hypergraph Ullmann Nauty precompute BFS
alpar@29	1441	%% LocalWords: bijection isomorphisms undirected infeasible dataset
alpar@29	1442	%% LocalWords: preprocessing incrementing decrementing neighbours
alpar@29	1443	%% LocalWords: expectable bioinformatics

author	Alpar Juttner <alpar@cs.elte.hu>
	Fri, 22 Sep 2017 12:52:55 +0200
changeset 29	0ff72a828b16
parent 28	523fddfd7a01
permissions	-rw-r--r--