Line data Source code
1 : // Copyright (c) 2013-2022 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/eventdispatcher
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 : /** \file
21 : * \brief Implementation of the process_info class.
22 : *
23 : * This file includes code to read data from the `/proc/<pid>/...` folder.
24 : * It parses the data and then saves it in various fields which you can
25 : * retrieve using the various function available in the process_info
26 : * class.
27 : *
28 : * The data is read when necessary and, in many cases, cached. In other
29 : * words, doing a get again may return data that was cached opposed to
30 : * data that was just read.
31 : *
32 : * See the proc(5) manual page for details about all the fields found in
33 : * the files under `/proc/<pid>/...`.
34 : *
35 : * \sa https://man7.org/linux/man-pages/man5/proc.5.html
36 : */
37 :
38 : // self
39 : //
40 : #include <cppprocess/process_info.h>
41 :
42 :
43 : // snaplogger lib
44 : //
45 : #include <snaplogger/message.h>
46 :
47 :
48 : // snapdev lib
49 : //
50 : #include <snapdev/file_contents.h>
51 : #include <snapdev/tokenize_string.h>
52 : #include <snapdev/pathinfo.h>
53 :
54 :
55 : // C++ lib
56 : //
57 : #include <iostream>
58 :
59 :
60 : // C lib
61 : //
62 : //#include <proc/readproc.h>
63 : //#include <stdio.h>
64 : //#include <sys/prctl.h>
65 : //#include <sys/wait.h>
66 : //#include <unistd.h>
67 :
68 :
69 : // last include
70 : //
71 : #include <snapdev/poison.h>
72 :
73 :
74 :
75 : namespace cppprocess
76 : {
77 :
78 :
79 :
80 : /** \brief Initialize a process_info object.
81 : *
82 : * This function saves the pid_t of the process. The other functions are
83 : * then able to retrieve process details such as the name, argument list,
84 : * processing times, etc.
85 : *
86 : * Most of the data gets cached, you can also reset a process_info object
87 : * to retrieve the data anew from the corresponding `/proc/<pid>/...` file.
88 : *
89 : * \param[in] pid The pid of the process you're interested in.
90 : */
91 1 : process_info::process_info(pid_t pid)
92 1 : : f_pid(pid)
93 : {
94 1 : }
95 :
96 :
97 : /** \brief Get the process identifier.
98 : *
99 : * This function retrieves the process identifier of this process_info
100 : * object.
101 : *
102 : * This function actually verifies that the process is still alive. If not,
103 : * then it returns -1. Once this function returns -1 once, it will then
104 : * always return -1.
105 : *
106 : * \return The process identifier.
107 : */
108 4 : pid_t process_info::get_pid()
109 : {
110 4 : if(f_pid != -1)
111 : {
112 8 : std::string const filename("/proc/" + std::to_string(f_pid));
113 4 : struct stat st;
114 4 : if(stat(filename.c_str(), &st) != 0)
115 : {
116 0 : f_pid = -1;
117 : }
118 4 : else if(!S_ISDIR(st.st_mode))
119 : {
120 0 : f_pid = -1;
121 : }
122 : }
123 :
124 4 : return f_pid;
125 : }
126 :
127 :
128 : /** \brief Get the parent process identifier.
129 : *
130 : * This function retrieves the parent process identifier of this
131 : * proc_info object.
132 : *
133 : * \return The parent process identifier.
134 : */
135 1 : pid_t process_info::get_ppid()
136 : {
137 1 : load_stat();
138 :
139 1 : return f_ppid;
140 : }
141 :
142 :
143 : /** \brief Get the process main group identifier.
144 : *
145 : * This function returns the process main group identifier. At first this
146 : * is the same as the main group of the user that started the process
147 : * although the process can change that parameter (if given the right to
148 : * do so).
149 : *
150 : * \return The group the process is a part of.
151 : */
152 1 : pid_t process_info::get_pgid()
153 : {
154 1 : load_stat();
155 :
156 1 : return f_pgid;
157 : }
158 :
159 :
160 : /** \brief Get the number of minor & major page faults.
161 : *
162 : * This function retrieves the minor & major number of page faults.
163 : *
164 : * \param[out] major The major page fault since last update.
165 : * \param[out] minor The minor page fault since last update.
166 : *
167 : * \return The process page fault statistics.
168 : */
169 1 : void process_info::get_page_faults(std::uint64_t & major, std::uint64_t & minor)
170 : {
171 1 : load_stat();
172 :
173 1 : minor = f_minor_faults;
174 1 : major = f_major_faults;
175 1 : }
176 :
177 :
178 : // Where did libproc get that from?! (i.e. you'd need to read all the
179 : // process to compute this, I think)
180 : ///** \brief Get the immediate percent of CPU usage for this process.
181 : // *
182 : // * This function retrieves the CPU usage as a percent of total CPU
183 : // * available.
184 : // *
185 : // * \return The immediate CPU usage as a percent.
186 : // */
187 : //unsigned process_info::get_pcpu() const -- TBD
188 : //{
189 : // return f_pcpu;
190 : //}
191 :
192 :
193 : /** \brief Get the immediate process status.
194 : *
195 : * This function retrieves the CPU status of the process.
196 : *
197 : * The status is one of the following:
198 : *
199 : * \li D -- uninterruptible sleep (usually I/O)
200 : * \li R -- running or runnable
201 : * \li S -- Sleeping
202 : * \li T -- stopped by a job control signal or trace
203 : * \li W -- paging (should not occur)
204 : * \li X -- dead (should never appear)
205 : * \li Z -- defunct zombie process
206 : *
207 : * See `man 5 proc` for more details (versions when such and such flag
208 : * as defined).
209 : *
210 : * \warning
211 : * If you set the \p force parameter to false, then the status is not
212 : * updated. In most cases, you want to first call this function to
213 : * update the `stat` data and then read the other data which will be
214 : * what was read at the same time as this status.
215 : *
216 : * \param[in] force Whether to force a reload of the `stat` file.
217 : *
218 : * \return The status of the process.
219 : */
220 1 : process_state_t process_info::get_state(bool force)
221 : {
222 1 : load_stat(force);
223 :
224 1 : return f_state;
225 : }
226 :
227 :
228 : /** \brief Get the percent usage of CPU by this process.
229 : *
230 : * \todo
231 : * Implement. At this time, this is not done because it requires reading
232 : * information about all the threads and compute the percent which is
233 : * not that simple. Also, right now we do not really need this info.
234 : *
235 : * \return Always -1 (until it gets implemented).
236 : */
237 0 : int process_info::get_cpu_percent()
238 : {
239 0 : return -1;
240 : }
241 :
242 :
243 : /** \brief Get the amount of time spent by this process.
244 : *
245 : * This function gives you information about the four variables
246 : * available cummulating the amount of time the process spent
247 : * running so far.
248 : *
249 : * \param[out] utime The accumulated user time of this very task.
250 : * \param[out] stime The accumulated kernel time of this very task.
251 : * \param[out] cutime The accumulated user time of this task and
252 : * its children.
253 : * \param[out] cstime The accumulated kernel time of this task
254 : * and its children.
255 : */
256 1 : void process_info::get_times(
257 : unsigned long long & utime
258 : , unsigned long long & stime
259 : , unsigned long long & cutime
260 : , unsigned long long & cstime)
261 : {
262 1 : load_stat();
263 :
264 1 : utime = f_user_time;
265 1 : stime = f_system_time;
266 1 : cutime = f_children_user_time;
267 1 : cstime = f_children_system_time;
268 1 : }
269 :
270 :
271 : /** \brief Get the real time priority of this process.
272 : *
273 : * This function returns the real time priority of the process.
274 : *
275 : * \return The process real time priority.
276 : */
277 0 : int process_info::get_priority()
278 : {
279 0 : load_stat();
280 :
281 0 : return f_priority;
282 : }
283 :
284 :
285 : /** \brief Get the Unix nice of this process.
286 : *
287 : * This function returns the Unix nice of the process.
288 : *
289 : * \return The process unix nice.
290 : */
291 1 : int process_info::get_nice()
292 : {
293 1 : load_stat();
294 :
295 1 : return f_nice;
296 : }
297 :
298 :
299 : /** \brief Get the size of this process.
300 : *
301 : * This function returns the total size of the process defined as
302 : * the virtual memory size.
303 : *
304 : * \todo
305 : * Look at loading the memory info as in `/proc/<pid>/status`.
306 : *
307 : * \return The process total virtual size.
308 : */
309 1 : std::uint64_t process_info::get_total_size()
310 : {
311 1 : load_stat();
312 :
313 1 : return f_rss
314 1 : + f_end_code - f_start_code
315 1 : + f_end_data - f_start_data;
316 : }
317 :
318 :
319 : /** \brief Get the RSS size of this process.
320 : *
321 : * This function returns the RSS size of the process defined as
322 : * the virtual memory size.
323 : *
324 : * \return The process RSS virtual size.
325 : */
326 1 : std::uint64_t process_info::get_rss_size()
327 : {
328 1 : load_stat();
329 :
330 1 : return f_rss;
331 : }
332 :
333 :
334 : /** \brief Get the process name.
335 : *
336 : * This function return the name as found in the `comm` file. This name
337 : * is usually the first 15 letters of the command name. This name can
338 : * be changed so it may differ from the one found in the `cmdline`.
339 : * We most often change our thread names to reflect what they are
340 : * used for.
341 : *
342 : * This is similar to the basename, however, the basename will be
343 : * "calculate" from the `cmdline` file instead.
344 : *
345 : * \note
346 : * The same name is found in the `stat` file (between parenthesis) so
347 : * we read it from there and not the `comm` file.
348 : *
349 : * \return The process name.
350 : */
351 1 : std::string process_info::get_name()
352 : {
353 1 : load_stat();
354 :
355 1 : return f_name;
356 : }
357 :
358 :
359 : /** \brief Get the process command.
360 : *
361 : * This function returns the command path and name as defined on the command
362 : * line.
363 : *
364 : * If something goes wrong (i.e. the process dies) then the function returns
365 : * an empty string.
366 : *
367 : * \return The command name and path.
368 : */
369 2 : std::string process_info::get_command()
370 : {
371 2 : load_cmdline();
372 :
373 2 : if(f_args.empty())
374 : {
375 0 : return std::string();
376 : }
377 :
378 2 : return f_args[0];
379 : }
380 :
381 :
382 : /** \brief Get the process (command) basename.
383 : *
384 : * By default, the process name is the full name used on the command line
385 : * to start this process. If that was a full path, then the full pass is
386 : * included in the process name.
387 : *
388 : * This function returns the basename only.
389 : *
390 : * \return The process basename.
391 : */
392 1 : std::string process_info::get_basename()
393 : {
394 1 : return snapdev::pathinfo::basename(get_command());
395 : }
396 :
397 :
398 : /** \brief Get the number of arguments defined on the command line.
399 : *
400 : * This function counts the number of arguments, including any empty
401 : * arguments.
402 : *
403 : * Count will be positive or null. The count does include the command
404 : * line (program name with index 0). This is why this function is
405 : * expected to return 1 or more. However, if the call happens after
406 : * the process died, then you will get zero.
407 : *
408 : * \return Count the number of arguments.
409 : *
410 : * \sa get_arg()
411 : */
412 10 : std::size_t process_info::get_args_size()
413 : {
414 10 : load_cmdline();
415 :
416 10 : return f_args.size();
417 : }
418 :
419 :
420 : /** \brief Get the argument at the specified index.
421 : *
422 : * This function returns one of the arguments of the command line of
423 : * this process. Note that it happens that arguments are empty strings.
424 : * The very first argument (index of 0) is the command full name, just
425 : * like in `argv[]`.
426 : *
427 : * \note
428 : * If the index is out of bounds, the function returns an empty string.
429 : * To know the number of available arguments, use the get_args_size()
430 : * function first.
431 : *
432 : * \param[in] index The index of the argument to retrieve.
433 : *
434 : * \return The specified argument or an empty string.
435 : *
436 : * \sa get_args_size()
437 : */
438 9 : std::string process_info::get_arg(int index)
439 : {
440 9 : if(static_cast<std::size_t>(index) >= get_args_size())
441 : {
442 0 : return std::string();
443 : }
444 :
445 9 : return f_args[index];
446 : }
447 :
448 :
449 : /** \brief Get the controlling terminal major/minor of this process.
450 : *
451 : * This function returns the TTY device major and minor numbers.
452 : *
453 : * This is the controlling terminal. It may return zeroes in which case
454 : * there is no controlling terminal attached to that process.
455 : *
456 : * \param[out] major The major device number of the controlling terminal.
457 : * \param[out] minor The minor device number of the controlling terminal.
458 : */
459 1 : void process_info::get_tty(int & major, int & minor)
460 : {
461 1 : load_stat();
462 :
463 1 : major = f_tty_major;
464 1 : minor = f_tty_minor;
465 1 : }
466 :
467 :
468 :
469 :
470 :
471 :
472 : /** \brief Load the `stat` file.
473 : *
474 : * This function loads the `/proc/<pid>/stat` file.
475 : *
476 : * If force is false (the default) and the file was already read, it does
477 : * not get reloaded. If you want the most current data for this process,
478 : * make sure to call the get_state() function with true first, then call
479 : * the other functions which will then get the updated data.
480 : *
481 : * \note
482 : * The functions that use the data read by the function make sure to
483 : * call it. The fields that are not defined by your Linux kernel will
484 : * generally be set to 0 and never change.
485 : *
486 : * \param[in] force Whether to force a reload.
487 : */
488 10 : void process_info::load_stat(bool force)
489 : {
490 : // already read?
491 : //
492 10 : if(f_ppid != -1
493 9 : && !force)
494 : {
495 8 : return;
496 : }
497 :
498 : // still active?
499 : //
500 2 : pid_t const pid(get_pid());
501 2 : if(pid == -1)
502 : {
503 0 : return;
504 : }
505 :
506 : // read stat
507 : //
508 4 : snapdev::file_contents s("/proc/" + std::to_string(pid) + "/stat");
509 2 : s.size_mode(snapdev::file_contents::size_mode_t::SIZE_MODE_READ);
510 2 : if(!s.read_all())
511 : {
512 0 : return;
513 : }
514 :
515 : // first we must extract the name because it can include spaces and
516 : // parenthesis so it completely breaks the rest of the parser otherwise
517 : //
518 4 : std::string line(s.contents());
519 2 : std::string::size_type const first_paren(line.find('('));
520 2 : std::string::size_type const last_paren(line.rfind(')'));
521 :
522 : // name not found!?
523 : //
524 2 : if(first_paren < 2
525 2 : || first_paren == std::string::npos
526 2 : || last_paren > 100)
527 : {
528 0 : return;
529 : }
530 :
531 : // pid mismatch?!
532 : //
533 2 : if(line.substr(0, first_paren - 1) != std::to_string(pid))
534 : {
535 0 : return;
536 : }
537 :
538 : // retrieve name
539 : //
540 2 : f_name = line.substr(first_paren + 1, last_paren - first_paren - 1);
541 :
542 : // retrieve the remaining fields (many)
543 : //
544 : // TODO: I don't think we need to (1) tokenize and then (2) convert
545 : // to integers, instead we want to consider a function which
546 : // converts directly to integers and saves the values to the
547 : // vector
548 : //
549 4 : std::vector<std::string> fields;
550 4 : std::string remaining(line.substr(last_paren + 2));
551 2 : snapdev::tokenize_string(fields, remaining, " ");
552 :
553 2 : if(fields.size() >= 1)
554 : {
555 2 : f_state = static_cast<process_state_t>(fields[0][0]);
556 : }
557 : else
558 : {
559 0 : f_state = process_state_t::PROCESS_STATE_UNKNOWN;
560 : }
561 :
562 2 : if(fields.size() <= 1)
563 : {
564 0 : return;
565 : }
566 :
567 : // convert the fields to numbers
568 : // except for the status (first in `fields`), all are positive numbers
569 : //
570 4 : std::vector<std::int64_t> values;
571 2 : values.reserve(fields.size() - 1);
572 4 : std::transform(
573 4 : fields.cbegin() + 1
574 : , fields.cend()
575 : , std::back_inserter(values)
576 102 : , [](auto const & v) { return std::stoull(v); });
577 :
578 : // since the vector starts with the PPID, I include an offset
579 : // so that way the index looks like the one found in the docs
580 : //
581 2 : constexpr int const field_offset(4);
582 :
583 : // the number of values can vary so we use a function to make sure
584 : // we don't go over the maximum number of values available in our
585 : // vector
586 : //
587 88 : auto get_value = [values](int idx) {
588 84 : std::size_t const offset(idx - field_offset);
589 84 : return offset >= values.size()
590 168 : ? 0
591 168 : : values[offset];
592 4 : };
593 :
594 : // TBD: for the below values we could also consider using an array and
595 : // use indices defined in an enum and then have a form of mapping
596 : // which would ease updates (although I don't think the kernel
597 : // makes changes to those values much of the time)
598 :
599 2 : f_ppid = get_value(4);
600 2 : f_pgid = get_value(5);
601 2 : f_session = get_value(6);
602 :
603 2 : std::uint32_t const tty(get_value(7));
604 2 : f_tty_major = (tty >> 8) & 0xffff;
605 4 : f_tty_minor = ((tty >> 16) & 0xff00)
606 2 : | ((tty >> 0) & 0x00ff);
607 :
608 2 : f_fp_group = get_value(8);
609 2 : f_kernel_flags = get_value(9); // see PF_* in /usr/src/linux-headers-<version>/include/linux/sched.h
610 2 : f_minor_faults = get_value(10);
611 2 : f_children_minor_faults = get_value(11);
612 2 : f_major_faults = get_value(12);
613 2 : f_children_major_faults = get_value(13);
614 2 : f_user_time = get_value(14);
615 2 : f_system_time = get_value(15);
616 2 : f_children_user_time = get_value(16);
617 2 : f_children_system_time = get_value(17);
618 2 : f_priority = get_value(18);
619 2 : f_nice = get_value(19);
620 2 : f_num_threads = get_value(20); // earlier versions of Linux used this field for something else
621 : // skip 21
622 2 : f_start_time = get_value(22);
623 2 : f_virtual_size = get_value(23);
624 2 : f_rss = get_value(24);
625 2 : f_rss_limit = get_value(25);
626 2 : f_start_code = get_value(26);
627 2 : f_end_code = get_value(27);
628 2 : f_start_stack = get_value(28);
629 2 : f_kernel_esp = get_value(29);
630 2 : f_kernel_eip = get_value(30);
631 : // skip 31
632 : // skip 32
633 : // skip 33
634 : // skip 34
635 2 : f_wchan = get_value(35);
636 : // skip 36
637 : // skip 37
638 2 : f_exit_signal = get_value(38);
639 2 : f_processor = get_value(39);
640 2 : f_rt_priority = get_value(40);
641 2 : f_schedule_policy = get_value(41); // see SCHED_* in /usr/src/linux-headers-<version>/include/linux/sched.h
642 2 : f_delayacct_blkio_ticks = get_value(42);
643 2 : f_guest_time = get_value(43);
644 2 : f_children_guest_time = get_value(44);
645 2 : f_start_data = get_value(45);
646 2 : f_end_data = get_value(46);
647 2 : f_start_break = get_value(47);
648 2 : f_arg_start = get_value(48);
649 2 : f_arg_end = get_value(49);
650 2 : f_env_start = get_value(50);
651 2 : f_env_end = get_value(51);
652 2 : f_exit_code = get_value(52);
653 : }
654 :
655 :
656 : /** \brief Load the command line.
657 : *
658 : * This function loads the command line and arguments once. It will be
659 : * cached when further calls happen.
660 : *
661 : * It will fill the f_args vector of strings. 0 will be the full path and
662 : * command name. The other strings are the arguments. The array is not
663 : * ended with a nullptr. Use the vector::size() to know whether you reached
664 : * the end or not.
665 : */
666 12 : void process_info::load_cmdline()
667 : {
668 : // already loaded?
669 : //
670 12 : if(!f_args.empty())
671 : {
672 11 : return;
673 : }
674 :
675 : // still active?
676 : //
677 1 : pid_t const pid(get_pid());
678 1 : if(pid == -1)
679 : {
680 0 : return;
681 : }
682 :
683 : // read cmdline
684 : //
685 2 : snapdev::file_contents cl("/proc/" + std::to_string(pid) + "/cmdline");
686 1 : cl.size_mode(snapdev::file_contents::size_mode_t::SIZE_MODE_READ);
687 1 : if(!cl.read_all())
688 : {
689 0 : return;
690 : }
691 :
692 2 : std::string const cmdline(cl.contents());
693 :
694 : // the following gives us the ability to handle the last string even if
695 : // not terminated with a null character
696 : //
697 1 : char const * l(cmdline.c_str());
698 1 : char const * e(l + cmdline.length());
699 : for(;;)
700 : {
701 10 : char const * s(l);
702 426 : while(l < e && *l != '\0')
703 : {
704 208 : ++l;
705 : }
706 10 : f_args.emplace_back(s, static_cast<std::string::size_type>(l - s));
707 10 : ++l;
708 10 : if(l >= e)
709 : {
710 1 : break;
711 : }
712 9 : }
713 : }
714 :
715 :
716 :
717 :
718 :
719 :
720 :
721 6 : } // namespace cppprocess
722 : // vim: ts=4 sw=4 et
|