/*-
 *
 * Store on disk and search through a graph's shortest paths.
 *
 * Copyright (c) 2010, Diomidis Spinellis
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * To compile with GCC:
 * g++ -O -I /path_to_boost_include/ -L /path_to_boost_lib/ -l boost_thread -l boost_system -l boost_filesystem smap.cpp -o smap
 *
 * To compile with Microsoft Visual C/C++:
 * cl /Zi /EHsc /I path_to_boost smap.cpp -Fesmap.exe /link /LIBPATH:path_to_boost_lib
 *
 * $Id: smap.cpp,v 1.15 2010/09/05 08:15:58 dds Exp $
 *
 */

#include <string>
#include <iostream>
#include <queue>
#include <list>
#include <functional>

#include <boost/interprocess/managed_mapped_file.hpp>
#include <boost/interprocess/offset_ptr.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/unordered_set.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/slist.hpp>

#include <boost/filesystem.hpp>

#include <boost/filesystem/operations.hpp>
#include <boost/progress.hpp>

using namespace boost::interprocess;

// Sufficient for the Wikipedia graph: 15M nodes
const std::size_t FileSize = 8l * 1024 * 1024 * 1024;
const std::size_t Elements = 20 * 1000 * 1000;

class Node;

//Typedefs of allocators and containers
typedef managed_mapped_file::segment_manager SegmentManager;

typedef allocator<void, SegmentManager> VoidAllocator;
typedef allocator<char, SegmentManager> CharAllocator;

typedef offset_ptr<Node> NodePtr;
typedef allocator<NodePtr, SegmentManager> NodePtrAllocator;

typedef slist<NodePtr, NodePtrAllocator> Edges;
typedef allocator<Edges, SegmentManager> EdgesAllocator;

typedef basic_string<char, std::char_traits<char>, CharAllocator> CharString;

// A graph node, suitable for performing a breadh-first search
class Node {
  public:
    typedef enum {White, Gray, Black} Color;

  private:
    CharString name;		// Node name
    Color color;		// Color used during BFS
    NodePtr predecessor;	// BFS predecessor
    Edges edges;		// Node's edges
  public:
    //Since VoidAllocator is convertible to any other allocator<T>, we can simplify
    //the initialization taking just one allocator for all inner containers.
    Node(const std::string &n, const VoidAllocator &voidAlloc)
      : name(n.begin(), n.end(), voidAlloc), color(White), predecessor(NULL), edges(voidAlloc) {
    }
    void addEdge(NodePtr p) {
      edges.push_front(p);
    }
    const CharString &getName() const {
      return name;
    }
    void setName(const std::string &n) {
      name = n.c_str();
    }
    const Edges &getEdges() const {
      return edges;
    }
    void setColor(Color c) {
      color = c;
    }
    Color getColor() const {
      return color;
    }
    NodePtr getPredecessor() const {
      return predecessor;
    }
    void setPredecessor(NodePtr p) {
      predecessor = p;
    }
    friend class NodeCompare;
    bool operator !=(const Node &b) const {
      return name != b.name;
    }
};

std::size_t hash_value(Node const& n) {
  boost::hash<CharString> hasher;
  return hasher(n.getName());
}

struct NodeEqual : std::binary_function<Node, Node, bool> {
  bool operator()(const Node &a, const Node &b) const {
    return a.getName() == b.getName();
  }
};

struct NodeCompare {
  bool operator() (const Node& lhs, const Node& rhs) const {
    return lhs.name < rhs.name;
  }
};

//Definition of the set holding Node entries
typedef allocator<Node, SegmentManager> NodeAllocator;
typedef boost::unordered_set<Node, boost::hash<Node>, NodeEqual, NodeAllocator> Nodes;
typedef Nodes::iterator NodesIter;

/*
 * Read ^A-separated nodes from the inputFile, storing the graph structure
 * in the specified backingFile.
 */
static void readData(const char *backingFile, const char *inputFile) {
  std::ifstream in(inputFile, std::ios::binary);

  if (in.fail()) {
    perror(inputFile);
    exit(1);
  }

  boost::filesystem::remove_all(backingFile);
  managed_mapped_file segment(create_only, backingFile, FileSize);

  //An allocator convertible to any allocator<T, SegmentManager> type
  VoidAllocator allocInst (segment.get_segment_manager());

  //Construct the memory map and fill it
  Nodes *entries = segment.construct<Nodes>("entries")(Elements, boost::hash<Node>(), NodeEqual(), allocInst);

  std::string line;
  // Reuse instead of constructing new
  Node n(std::string(), allocInst);

  // Progress indicator
  boost::progress_display showProgress(boost::filesystem::file_size(inputFile));
  std::streampos pos = in.tellg();

  // Loop through all lines, adding them to the graph
  while (std::getline(in, line)) {
    int split = line.find('\001');
    if (split == std::string::npos) {
      std::cerr << "No separator: " << line << std::endl;
      continue;
    }
    n.setName(line.substr(0, split));
    NodesIter from(entries->insert(n).first);
    n.setName(line.substr(split + 1));
    NodesIter to(entries->insert(n).first);
    (const_cast<Node &>(*from)).addEdge(const_cast<Node *>(&*to));
    // Update progress indicator
    showProgress += in.tellg() - pos;
    pos = in.tellg();
  }
}

/*
 * Breadth first search from node from one node to another in a graph
 * of n nodes.
 * Return true if a path is found, false otherwise.
 */
static bool breadthFirstSearchFor(NodePtr from, NodePtr to, size_t n) {
  std::queue<NodePtr> q;

  boost::progress_display progress(n);
  from->setColor(Node::Gray);
  q.push(from);
  while (!q.empty()) {
    NodePtr u = q.front();
    q.pop();
    const Edges edges = u->getEdges();
    for (Edges::const_iterator j = edges.begin(); j != edges.end(); j++)
      if ((*j)->getColor() == Node::White) {
        (*j)->setColor(Node::Gray);
        (*j)->setPredecessor(u);
	++progress;
        if (*j == to)
          return true;  // Found
        q.push(*j);
      }
    u->setColor(Node::Black);
  }
  return false;	// Not found
}

/*
 * Find a node with the specified name and return a pointer to it.
 * Exit the program with an error if no such node can be found.
 */
static Node *findNode(const Nodes *entries, const Node &n) {
  Nodes::const_iterator i = entries->find(n);
  if (i == entries->end()) {
    std::cerr << "Node [" << n.getName() << "] not found" << std::endl;
    exit(1);
  }
  return const_cast<Node *>(&*i);
}

/*
 * Search and report the shortest graph path from "from" to "to"
 * The graph is stored in backingFile.
 */
static void searchData(const char *backingFile, const std::string &from, const std::string &to) {
  managed_mapped_file segment(open_copy_on_write, backingFile);

  //An allocator convertible to any allocator<T, SegmentManager> type
  VoidAllocator allocInst(segment.get_segment_manager());

  // Obtain the previously saved entries
  Nodes *entries = segment.find<Nodes>("entries").first;

  NodePtr toPtr;
  bool found = breadthFirstSearchFor(findNode(entries, Node(from, allocInst)),
      toPtr = findNode(entries, Node(to, allocInst)), entries->size());
  std::cout << std::endl; // End progress indicator

  if (!found) {
    std::cerr << "No path found" << std::endl;
    return;
  }

  // Report path in the right order
  std::list <std::string> result;
  for (NodePtr p = toPtr; p != NULL; p  = p->getPredecessor())
    result.push_front(p->getName().c_str());
  for (std::list <std::string>::const_iterator i = result.begin(); i != result.end(); i++)
      std::cout << *i << "\n";
}

// Report program usage and exit
static void usage() {
  std::cerr << "Usage: smap " << " -s backingFile word1 word2 | -r backingFile inputFile" << std::endl;
  exit(1);
}

int main(int argc, char *argv[]) {
  if (argc < 4 || *argv[1] != '-')
    usage();

  switch (argv[1][1]) {
  case 'r':
    readData(argv[2], argv[3]);
    break;
  case 's':
    if (argc != 5)
      usage();
    searchData(argv[2], argv[3], argv[4]);
    break;
  default:
    usage();
  }
}

// vim: shiftwidth=2 smarttab
