- grapher.php
- #!/usr/bin/php
- <?php
- require_once(DOKU_INC . 'inc/init.php');
- class Grapher extends DokuCLI {
- /**
- * Register options and arguments on the given $options object
- *
- * @param DokuCLI_Options $options
- * @return void
- */
- protected function setup(DokuCLI_Options $options) {
- $options->setHelp('Creates a graph representation of pages and media files and how they are interlinked.');
- $options->registerOption(
- 'depth',
- 'Recursion depth, eg. how deep to look into the given namespaces. Use 0 for all. Default: 1',
- 'd', 'depth');
- $options->registerOption(
- 'media',
- "How to handle media files. 'ns' includes only media that is located in the given namespaces, ".
- "'all' includes all media files and 'none' ignores the media files completely. ".
- "Default: ns",
- 'm', 'ns|all|none');
- $options->registerOption(
- 'format',
- "The wanted output format. 'dot' is a very simple format which can be used to visualize the resulting ".
- "graph with graphviz. The 'gexf' format is a more complex XML-based format which contains more info ".
- "about the found nodes and can be loaded in Gephi. Default: dot",
- 'f', 'dot|gexf|json');
- $options->registerOption(
- 'output',
- "Where to store the output eg. a filename. If not given the output is written to STDOUT.",
- 'o', 'file');
- $options->registerArgument(
- 'namespaces',
- "Give all wiki namespaces you want to have graphed. If no namespace is given, the root ".
- "namespace is assumed.",
- false
- );
- }
- /**
- * Your main program
- *
- * Arguments and options have been parsed when this is run
- *
- * @param DokuCLI_Options $options
- * @return void
- */
- protected function main(DokuCLI_Options $options) {
- $this->fatal('Bad media option: ' . $media);
- }
- $this->fatal('Bad format option: ' . $format);
- }
- if($output == '-') $output = 'php://stdout';
- if(!$fh) $this->fatal("Failed to open $output");
- $data = $this->gather_data($namespaces, $depth, $media);
- if($format == 'dot') {
- $this->create_dot($data, $fh);
- } elseif($format == 'gexf') {
- $this->create_gexf($data, $fh);
- } elseif($format == 'json') {
- $this->create_json($data, $fh);
- }
- }
- /**
- * Find all the node and edge data for the given namespaces
- * @param $namespaces
- * @param int $depth
- * @param string $incmedia
- * @return array
- */
- protected function gather_data($namespaces, $depth = 0, $incmedia = 'ns') {
- global $conf;
- /** @var helper_plugin_translation $transplugin */
- $transplugin = plugin_load('helper', 'translation');
- foreach($namespaces as $ns) {
- // find media
- if($incmedia == 'ns') {
- search(
- $data,
- $conf['mediadir'],
- 'search_universal',
- 'depth' => $depth,
- 'listfiles' => true,
- 'listdirs' => false,
- 'pagesonly' => false,
- 'skipacl' => true,
- 'keeptxt' => true,
- 'meta' => true,
- ),
- );
- // go through all those media files
- 'title' => noNS($item['id']),
- 'size' => $item['size'],
- 'ns' => getNS($item['id']),
- 'time' => $item['mtime'],
- );
- }
- }
- // find pages
- search(
- $data,
- $conf['datadir'],
- 'search_universal',
- 'depth' => $depth,
- 'listfiles' => true,
- 'listdirs' => false,
- 'pagesonly' => true,
- 'skipacl' => true,
- 'firsthead' => true,
- 'meta' => true,
- ),
- );
- // ns start page
- if($ns && page_exists($ns)) {
- 'id' => $ns,
- 'ns' => getNS($ns),
- 'title' => p_get_first_heading($ns, false),
- 'perm' => 16,
- 'type' => 'f',
- 'level' => 0,
- 'open' => 1,
- );
- }
- // go through all those pages
- $time = (int) p_get_metadata($item['id'], 'date created', false);
- if(!$time) $time = $item['mtime'];
- $lang = ($transplugin) ? $transplugin->getLangPart($item['id']) : '';
- 'title' => $item['title'],
- 'ns' => $item['ns'],
- 'size' => $item['size'],
- 'time' => $time,
- 'lang' => $lang
- );
- }
- }
- // now get links and media
- foreach($pages as $pid => $item) {
- // get instructions
- $ins = p_cached_instructions(wikiFN($pid), false, $pid);
- // find links and media usage
- foreach($ins as $i) {
- $mid = null;
- if($i[0] == 'internallink') {
- $id = $i[1][0];
- $label = $i[1][1];
- $exists = true;
- resolve_pageid($item['ns'], $id, $exists);
- if($id == $pid) continue; // skip self references
- $pages[$pid]['links'][] = $id;
- $pages[$pid]['linklabel'][] = $label;
- }
- $mid = $i[1][1]['src']; // image link
- } else {
- continue; // we're done here
- }
- }
- if($i[0] == 'internalmedia') {
- $mid = $i[1][0];
- }
- if($incmedia == 'none') continue; // no media wanted
- $exists = true;
- resolve_mediaid($item['ns'], $mid, $exists);
- $mid = cleanID($mid);
- if($exists) {
- if($incmedia == 'all') {
- 'ns' => getNS($mid),
- 'title' => noNS($mid),
- );
- }
- $pages[$pid]['media'][] = $mid;
- $pages[$pid]['media'][] = $mid;
- }
- }
- }
- // clean up duplicates
- }
- }
- /**
- * Create a Graphviz dot representation
- *
- * @param array $data
- * @param resource $fh
- */
- protected function create_dot(&$data, $fh) {
- $pages =& $data['pages'];
- $media =& $data['media'];
- fwrite($fh, "labelloc=\"b\";\nlabel=\"Actors of a Persecution - https:\/\/challengepower.info - #FreeAssange\";");
- node [shape = \"circle\", fontsize=16, color=black, bgcolor=aquamarine];
- edge [arrowsize=2, color=black];
- root=\"page-the_actors:assange\";
- subgraph corporations {
- \"page-the_actors:corporations:darktrace\" ,
- \"page-the_actors:corporations:thales\" ,
- \"page-the_actors:corporations:sc_strategy_limited\",
- \"page-the_actors:corporations:startfor\",
- \"page-the_actors:corporations:uc_global_s.l\"
- }
- subgraph leaks {
- \"page-the_actors:leaks:global_intelligences_files\" ,
- \"page-the_actors:leaks:spy_files\",
- \"page-the_actors:leaks:vault7\"
- }
- subgraph countries {
- \"page-the_actors:ukgov:start\",
- \"page-the_actors:usgov:start\",
- \"page-the_actors:sweden:start\",
- \"page-the_actors:ecuador:start\"
- }
- \n");
- // create all nodes first
- foreach($pages as $id => $page) {
- shape=note,
- label=\"{$page['title']}\",\n");
- // look for links
- // look for media
- {
- $folder="";
- for($i=0 ; $i < $file-1 ; $i++)
- {
- $folder = $folder . $path[$i] . "/";
- }
- fwrite($fh,",\n labelloc=\"b\",\n shapefile=\"".DOKU_INC. "data/media/" . $folder . "300x300-" . $path[$file-1] . "\"");
- }
- }
- // var_dump($pages);
- /*
- foreach($media as $id => $item) {
- fwrite($fh, " \"media-$id\" [shape=box, label=\"$id\", color=sandybrown, fontname=Helvetica];\n");
- }
- */
- // now create all the links
- foreach($pages as $id => $page) {
- {
- fwrite($fh, " \"page-". $id ."\" -> \"page-". $page['links'][$n] ."\" [color=navy;label=\"". $page['linklabel'][$n] ."\"];\n");
- }
- }
- }
- /**
- * Create a GEXF representation
- *
- * @param array $data
- * @param resource $fh
- */
- protected function create_gexf(&$data, $fh) {
- $pages =& $data['pages'];
- $media =& $data['media'];
- $fh, "<gexf xmlns=\"http://www.gexf.net/1.1draft\" version=\"1.1\"
- xmlns:viz=\"http://www.gexf.net/1.1draft/viz\">\n"
- );
- // define attributes
- // create all nodes first
- foreach($pages as $id => $item) {
- }
- foreach($media as $id => $item) {
- }
- // now create all the edges
- $cnt = 0;
- foreach($pages as $id => $page) {
- foreach($page['links'] as $link) {
- $cnt++;
- }
- foreach($page['media'] as $link) {
- $cnt++;
- }
- }
- }
- /* Create a JSON representation
- */
- protected function create_json(&$data, $fh) {
- $pages =& $data['pages'];
- $media =& $data['media'];
- $output = '{
- "root": "'. $root_name .'",
- "nodes": ['."\n" ;
- $i=0;
- // create all nodes first
- foreach($pages as $id => $item){
- $i++;
- $output .= "{\n";
- $output .= '"id" : "'. $id .'",'."\n";
- $output .= '"name" : "'. $title .'",'."\n";
- $output .= '"value" : "'. $item['size'] .'",'."\n";
- {
- $folder="";
- for($n=0 ; $n < $file-1 ; $n++)
- {
- $folder = $folder . $path[$n] . "/";
- }
- $output .= ',"img" : "'. DOKU_INC. "data/media/" . $folder . "300x300-" . $path[$file-1] .'"'."\n";
- }
- // $output .= '"has_conflict" : "'.$id.'"'."\n";
- // $output .= '"description" : "'.$id.'"'."\n";
- $output .= "}\n";
- }
- // now create all the edges
- $output .= '],
- "links": ['."\n";
- $cnt = 0;
- foreach($pages as $id => $page){
- foreach($page['links'] as $link){
- if ($cnt > 0) $output .= ',';
- $output .= '{
- "source": "'. $id . '",
- "target": "'. $link . '",
- "value": "'. $page["linklabel"][$cnt] .'"
- }';
- $cnt++;
- }
- }
- $output .= " ] }";
- }
- }
- $grapher = new Grapher();
- $grapher->run();
You could leave a comment if you were logged in.