Translations of this page?:

This is an old revision of the document!


grapher.php
  1. #!/usr/bin/php
  2. <?php
  3. if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
  4. requirrapher extends DokuCLI {
  5.  
  6. /**
  7.   * Register options and arguments on the given $options object
  8.   *
  9.   * @param DokuCLI_Options $options
  10.   * @return void
  11.   */
  12. protected function setup(DokuCLI_Options $options) {
  13. $options->setHelp('Creates a graph representation of pages and media files and how they are interlinked.');
  14. $options->registerOption(
  15. 'depth',
  16. 'Recursion depth, eg. how deep to look into the given namespaces. Use 0 for all. Default: 1',
  17. 'd', 'depth');
  18. $options->registerOption(
  19. 'media',
  20. "How to handle media files. 'ns' includes only media that is located in the given namespaces, ".
  21. "'all' includes all media files and 'none' ignores the media files completely. ".
  22. "Default: ns",
  23. 'm', 'ns|all|none');
  24. $options->registerOption(
  25. 'format',
  26. "The wanted output format. 'dot' is a very simple format which can be used to visualize the resulting ".
  27. "graph with graphviz. The 'gexf' format is a more complex XML-based format which contains more info ".
  28. "about the found nodes and can be loaded in Gephi. Default: dot",
  29. 'f', 'dot|gexf');
  30. $options->registerOption(
  31. 'output',
  32. "Where to store the output eg. a filename. If not given the output is written to STDOUT.",
  33. 'o', 'file');
  34. $options->registerArgument(
  35. 'namespaces',
  36. "Give all wiki namespaces you want to have graphed. If no namespace is given, the root ".
  37. "namespace is assumed.",
  38. false
  39. );
  40. }
  41.  
  42. /**
  43.   * Your main program
  44.   *
  45.   * Arguments and options have been parsed when this is run
  46.   *
  47.   * @param DokuCLI_Options $options
  48.   * @return void
  49.   */
  50. protected function main(DokuCLI_Options $options) {
  51. $depth = $options->getOpt('depth', 1);
  52. $media = $options->getOpt('media', 'ns');
  53. if(!in_array($media, array('ns', 'all', 'none'))) {
  54. $this->fatal('Bad media option: ' . $media);
  55. }
  56. $format = $options->getOpt('format', 'dot');
  57. if(!in_array($format, array('dot', 'gexf'))) {
  58. $this->fatal('Bad format option: ' . $format);
  59. }
  60. $output = $options->getOpt('output', '-');
  61. if($output == '-') $output = 'php://stdout';
  62.  
  63. $namespaces = array_map('cleanID', $options->args);
  64. if(!count($namespaces)) $namespaces = array(''); //import from top
  65.  
  66. $fh = @fopen($output, 'w');
  67. if(!$fh) $this->fatal("Failed to open $output");
  68.  
  69. $data = $this->gather_data($namespaces, $depth, $media);
  70. if($format == 'dot') {
  71. $this->create_dot($data, $fh);
  72. } elseif($format == 'gexf') {
  73. $this->create_gexf($data, $fh);
  74. }
  75.  
  76. fclose($fh);
  77. }
  78.  
  79. /**
  80.   * Find all the node and edge data for the given namespaces
  81.   * @param $namespaces
  82.   * @param int $depth
  83.   * @param string $incmedia
  84.   * @return array
  85.   */
  86. protected function gather_data($namespaces, $depth = 0, $incmedia = 'ns') {
  87. global $conf;
  88. /** @var helper_plugin_translation $transplugin */
  89. $transplugin = plugin_load('helper', 'translation');
  90.  
  91. $pages = array();
  92. $media = array();
  93. foreach($namespaces as $ns) {
  94. // find media
  95. if($incmedia == 'ns') {
  96. $data = array();
  97. search(
  98. $data,
  99. $conf['mediadir'],
  100. 'search_universal',
  101. 'depth' => $depth,
  102. 'listfiles' => true,
  103. 'listdirs' => false,
  104. 'pagesonly' => false,
  105. 'skipacl' => true,
  106. 'keeptxt' => true,
  107. 'meta' => true,
  108. ),
  109. str_replace(':', '/', $ns)
  110. );
  111.  
  112. // go through all those media files
  113. while($item = array_shift($data)) {
  114. $media[$item['id']] = array(
  115. 'title' => noNS($item['id']),
  116. 'size' => $item['size'],
  117. 'ns' => getNS($item['id']),
  118. 'time' => $item['mtime'],
  119. );
  120. }
  121. }
  122.  
  123. // find pages
  124. $data = array();
  125. search(
  126. $data,
  127. $conf['datadir'],
  128. 'search_universal',
  129. 'depth' => $depth,
  130. 'listfiles' => true,
  131. 'listdirs' => false,
  132. 'pagesonly' => true,
  133. 'skipacl' => true,
  134. 'firsthead' => true,
  135. 'meta' => true,
  136. ),
  137. str_replace(':', '/', $ns)
  138. );
  139.  
  140. // ns start page
  141. if($ns && page_exists($ns)) {
  142. $data[] = array(
  143. 'id' => $ns,
  144. 'ns' => getNS($ns),
  145. 'title' => p_get_first_heading($ns, false),
  146. 'size' => filesize(wikiFN($ns)),
  147. 'mtime' => filemtime(wikiFN($ns)),
  148. 'perm' => 16,
  149. 'type' => 'f',
  150. 'level' => 0,
  151. 'open' => 1,
  152. );
  153. }
  154.  
  155. // go through all those pages
  156. while($item = array_shift($data)) {
  157. $time = (int) p_get_metadata($item['id'], 'date created', false);
  158. if(!$time) $time = $item['mtime'];
  159. $lang = ($transplugin) ? $transplugin->getLangPart($item['id']) : '';
  160.  
  161. if($lang) $item['ns'] = preg_replace('/^' . $lang . '(:|$)/', '', $item['ns']);
  162.  
  163. $pages[$item['id']] = array(
  164. 'title' => $item['title'],
  165. 'ns' => $item['ns'],
  166. 'size' => $item['size'],
  167. 'time' => $time,
  168. 'links' => array(),
  169. 'linklabel' => array(),
  170. 'media' => array(),
  171. 'lang' => $lang
  172. );
  173. }
  174. }
  175.  
  176. // now get links and media
  177. foreach($pages as $pid => $item) {
  178. // get instructions
  179. $ins = p_cached_instructions(wikiFN($pid), false, $pid);
  180. // find links and media usage
  181. foreach($ins as $i) {
  182. $mid = null;
  183.  
  184. if($i[0] == 'internallink') {
  185. $id = $i[1][0];
  186. $label = $i[1][1];
  187. $exists = true;
  188. resolve_pageid($item['ns'], $id, $exists);
  189. list($id) = explode('#', $id, 2);
  190. if($id == $pid) continue; // skip self references
  191. if($exists && isset($pages[$id])) {
  192. $pages[$pid]['links'][] = $id;
  193. $pages[$pid]['linklabel'][] = $label;
  194. }
  195. if(is_array($i[1][1]) && $i[1][1]['type'] == 'internalmedia') {
  196. $mid = $i[1][1]['src']; // image link
  197. } else {
  198. continue; // we're done here
  199. }
  200. }
  201.  
  202. if($i[0] == 'internalmedia') {
  203. $mid = $i[1][0];
  204. }
  205.  
  206. if(is_null($mid)) continue;
  207. if($incmedia == 'none') continue; // no media wanted
  208.  
  209. $exists = true;
  210. resolve_mediaid($item['ns'], $mid, $exists);
  211. list($mid) = explode('#', $mid, 2);
  212. $mid = cleanID($mid);
  213.  
  214. if($exists) {
  215. if($incmedia == 'all') {
  216. if(!isset($media[$mid])) { //add node
  217. $media[$mid] = array(
  218. 'size' => filesize(mediaFN($mid)),
  219. 'time' => filemtime(mediaFN($mid)),
  220. 'ns' => getNS($mid),
  221. 'title' => noNS($mid),
  222. );
  223. }
  224. $pages[$pid]['media'][] = $mid;
  225. } elseif(isset($media[$mid])) {
  226. $pages[$pid]['media'][] = $mid;
  227. }
  228. }
  229. }
  230.  
  231. // clean up duplicates
  232. $pages[$pid]['links'] = array_unique($pages[$pid]['links']);
  233. $pages[$pid]['media'] = array_unique($pages[$pid]['media']);
  234. }
  235. //var_dump($pages);
  236.  
  237. return array('pages' => $pages, 'media' => $media);
  238.  
  239.  
  240. }
  241.  
  242. /**
  243.   * Create a Graphviz dot representation
  244.   *
  245.   * @param array $data
  246.   * @param resource $fh
  247.   */
  248. protected function create_dot(&$data, $fh) {
  249. $pages =& $data['pages'];
  250. $media =& $data['media'];
  251.  
  252. fwrite($fh, "digraph G {\n");
  253. fwrite($fh, "concentrate=true;
  254. //compound=true;
  255. //baranksep=1.25;
  256. splines=ortho;
  257. splines=spline;
  258. labelURL=\"http://challengepower.info\"
  259. //fontsize=36;
  260. //clusterMode=local;
  261. //size="12.93,7.28!";
  262. size=\"10,12!\";
  263. ratio=compress;
  264. rankype=same;
  265. rankdir=TB;
  266. fixedsize=true;
  267. dpi=100;
  268. bgcolor=\"white\";
  269. node [shape = \"circle\", fontsize=16, color=black, bgcolor=aquamarine];
  270. edge [arrowsize=2, color=black];
  271. \n");
  272.  
  273. // create all nodes first
  274. foreach($pages as $id => $page) {
  275. fwrite($fh, " \"page-$id\" [
  276. shape=note,
  277. label=\"{$page['title']}\",\n");
  278. // look for links
  279. if (!empty( $page['links'][0]))
  280. fwrite($fh,"URL=\"/". str_replace(':','/',$page['links'][0]) ."\"");
  281. else fwrite($fh,"URL=\"#\"");
  282. // look for media
  283. if (!empty( $page['media'][0]))
  284. {
  285. $path = explode (":", $page['media'][0]);
  286. $file = count($path );
  287. $folder="";
  288. for($i=0 ; $i < $file-1 ; $i++)
  289. {
  290. $folder = $folder . $path[$i] . "/";
  291. }
  292. fwrite($fh,"\n // " . $page['media'][0] . " ### " . $file ." ### ". $path[0] . " ### ". $path[1]);
  293. fwrite($fh,",\n labelloc=\"b\",\n shapefile=\"".DOKU_INC. "data/media/" . $folder . "300x300-" . $path[$file-1] . "\"");
  294. }
  295. fwrite($fh,";\n ];\n");
  296. }
  297. // var_dump($pages);
  298. /*
  299.   foreach($media as $id => $item) {
  300.   fwrite($fh, " \"media-$id\" [shape=box, label=\"$id\", color=sandybrown, fontname=Helvetica];\n");
  301.   }
  302.   */
  303. // now create all the links
  304. foreach($pages as $id => $page) {
  305. foreach($page['links'] as $link) {
  306. foreach($page['linklabel'] as $label) {
  307. fwrite($fh, " \"page-$id\" -> \"page-$link\" [color=navy;label=\"".$label."\"];\n");
  308. }
  309. }
  310. /*
  311.   foreach($page['media'] as $link) {
  312.   fwrite($fh, " \"page-$id\" -> \"media-$link\" [color=firebrick];\n");
  313.   }
  314.   */
  315. }
  316. fwrite($fh, "}\n");
  317. }
  318.  
  319. /**
  320.   * Create a GEXF representation
  321.   *
  322.   * @param array $data
  323.   * @param resource $fh
  324.   */
  325. protected function create_gexf(&$data, $fh) {
  326. $pages =& $data['pages'];
  327. $media =& $data['media'];
  328.  
  329. fwrite($fh, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  330. $fh, "<gexf xmlns=\"http://www.gexf.net/1.1draft\" version=\"1.1\"
  331. xmlns:viz=\"http://www.gexf.net/1.1draft/viz\">\n"
  332. );
  333. fwrite($fh, " <meta lastmodifieddate=\"" . date('Y-m-d H:i:s') . "\">\n");
  334. fwrite($fh, " <creator>DokuWiki</creator>\n");
  335. fwrite($fh, " </meta>\n");
  336. fwrite($fh, " <graph mode=\"dynamic\" defaultedgetype=\"directed\">\n");
  337.  
  338. // define attributes
  339. fwrite($fh, " <attributes class=\"node\">\n");
  340. fwrite($fh, " <attribute id=\"title\" title=\"Title\" type=\"string\" />\n");
  341. fwrite($fh, " <attribute id=\"lang\" title=\"Language\" type=\"string\" />\n");
  342. fwrite($fh, " <attribute id=\"ns\" title=\"Namespace\" type=\"string\" />\n");
  343. fwrite($fh, " <attribute id=\"type\" title=\"Type\" type=\"liststring\">\n");
  344. fwrite($fh, " <default>page|media</default>\n");
  345. fwrite($fh, " </attribute>\n");
  346. fwrite($fh, " <attribute id=\"time\" title=\"Created\" type=\"long\" />\n");
  347. fwrite($fh, " <attribute id=\"size\" title=\"File Size\" type=\"long\" />\n");
  348. fwrite($fh, " </attributes>\n");
  349.  
  350. // create all nodes first
  351. fwrite($fh, " <nodes>\n");
  352. foreach($pages as $id => $item) {
  353. $title = htmlspecialchars($item['title']);
  354. $lang = htmlspecialchars($item['lang']);
  355. fwrite($fh, " <node id=\"page-$id\" label=\"$id\" start=\"{$item['time']}\">\n");
  356. fwrite($fh, " <attvalues>\n");
  357. fwrite($fh, " <attvalue for=\"type\" value=\"page\" />\n");
  358. fwrite($fh, " <attvalue for=\"title\" value=\"$title\" />\n");
  359. fwrite($fh, " <attvalue for=\"lang\" value=\"$lang\" />\n");
  360. fwrite($fh, " <attvalue for=\"ns\" value=\"{$item['ns']}\" />\n");
  361. fwrite($fh, " <attvalue for=\"time\" value=\"{$item['time']}\" />\n");
  362. fwrite($fh, " <attvalue for=\"size\" value=\"{$item['size']}\" />\n");
  363. fwrite($fh, " </attvalues>\n");
  364. fwrite($fh, " <viz:shape value=\"square\" />\n");
  365. fwrite($fh, " <viz:color r=\"173\" g=\"216\" b=\"230\" />\n");
  366. fwrite($fh, " </node>\n");
  367. }
  368. foreach($media as $id => $item) {
  369. $title = htmlspecialchars($item['title']);
  370. $lang = htmlspecialchars($item['lang']);
  371. fwrite($fh, " <node id=\"media-$id\" label=\"$id\" start=\"{$item['time']}\">\n");
  372. fwrite($fh, " <attvalues>\n");
  373. fwrite($fh, " <attvalue for=\"type\" value=\"media\" />\n");
  374. fwrite($fh, " <attvalue for=\"title\" value=\"$title\" />\n");
  375. fwrite($fh, " <attvalue for=\"lang\" value=\"$lang\" />\n");
  376. fwrite($fh, " <attvalue for=\"ns\" value=\"{$item['ns']}\" />\n");
  377. fwrite($fh, " <attvalue for=\"time\" value=\"{$item['time']}\" />\n");
  378. fwrite($fh, " <attvalue for=\"size\" value=\"{$item['size']}\" />\n");
  379. fwrite($fh, " </attvalues>\n");
  380. fwrite($fh, " <viz:shape value=\"disc\" />\n");
  381. fwrite($fh, " <viz:color r=\"244\" g=\"164\" b=\"96\" />\n");
  382. fwrite($fh, " </node>\n");
  383. }
  384. fwrite($fh, " </nodes>\n");
  385.  
  386. // now create all the edges
  387. fwrite($fh, " <edges>\n");
  388. $cnt = 0;
  389. foreach($pages as $id => $page) {
  390. foreach($page['links'] as $link) {
  391. $cnt++;
  392. fwrite($fh, " <edge id=\"$cnt\" source=\"page-$id\" target=\"page-$link\" />\n");
  393. }
  394. foreach($page['media'] as $link) {
  395. $cnt++;
  396. fwrite($fh, " <edge id=\"$cnt\" source=\"page-$id\" target=\"media-$link\" />\n");
  397. }
  398. }
  399. fwrite($fh, " </edges>\n");
  400.  
  401. fwrite($fh, " </graph>\n");
  402. fwrite($fh, "</gexf>\n");
  403. }
  404.  
  405. }
  406.  
  407. $grapher = new Grapher();
  408. $grapher->run();
You could leave a comment if you were logged in.
  • dev/grapher-php.1601969348.txt.gz
  • Last modified: 2020/10/06 07:29
  • by bab