0

I am currently doing the MIT opencourseware OS course (https://ocw.mit.edu/courses/6-828-operating-system-engineering-fall-2012/) and I am struggling with the first class assignment which is implementing your own small shell.

My code is the following:

    #include <stdlib.h>
    #include <unistd.h>
    #include <stdio.h>
    #include <fcntl.h>
    #include <string.h>
    #include <assert.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <sys/wait.h>
    #include <stdint.h>
    // Simplifed xv6 shell.
    
    #define MAXARGS 10
    
    // All commands have at least a type. Have looked at the type, the code
    // typically casts the *cmd to some specific cmd type.
    struct cmd {
      int type;          //  ' ' (exec), | (pipe), '<' or '>' for redirection
    };
    
    struct execcmd {
      int type;              // ' '
      char *argv[MAXARGS];   // arguments to the command to be exec-ed
    };
    
    struct redircmd {
      int type;          // < or > 
      struct cmd *cmd;   // the command to be run (e.g., an execcmd)
      char *file;        // the input/output file
      int mode;          // the mode to open the file with
      int fd;            // the file descriptor number to use for the file
    };
    
    struct pipecmd {
      int type;          // |
      struct cmd *left;  // left side of pipe
      struct cmd *right; // right side of pipe
    };
    
    int fork1(void);  // Fork but exits on failure.
    struct cmd *parsecmd(char*);
    
    // Execute cmd.  Never returns.
    void
    runcmd(struct cmd *cmd)
    {
      int p[2], r;
      struct execcmd *ecmd;
      struct pipecmd *pcmd;
      struct redircmd *rcmd;
    
      if(cmd == 0)
        exit(0);
      
      switch(cmd->type){
      default:
        fprintf(stderr, "unknown runcmd\n");
        exit(-1);
    
      case ' ':
        ecmd = (struct execcmd*)cmd;
        if(ecmd->argv[0] == 0)
          exit(0);
    
          if (strcmp(ecmd->argv[0], "ls") == 0) {
          execvp("ls", ecmd->argv);
          fprintf(stderr, "execvp ls failed\n");
          }
          if (strcmp(ecmd->argv[0], "echo") == 0) {
          perror("This is from echo:\n");
          execvp("echo", ecmd->argv);
          fprintf(stderr, "execvp failed\n");
          }
          if (strcmp(ecmd->argv[0], "grep") == 0) {
          //perror("This is from grep:\n");
          execvp("grep", ecmd->argv);
          fprintf(stderr, "execvp failed\n");
          }
          if (strcmp(ecmd->argv[0], "cat") == 0) {
          //perror("This is from grep:\n");
          execvp("cat", ecmd->argv);
          fprintf(stderr, "execvp failed\n");
          }
        break;
    
      case '>':
      case '<':
        rcmd = (struct redircmd*)cmd;
        fprintf(stderr, "redir not implemented\n");
        // Your code here ...
        runcmd(rcmd->cmd);
        break;
    
   case '|':
         pcmd = (struct pipecmd*)cmd;
            struct execcmd *left = (struct execcmd*)pcmd->left;
           struct execcmd *right = (struct execcmd*)pcmd->right;

            int i;
            int pd[2];
            pipe(pd);
            int pid1 = fork1();
            int pid2 = fork1();
            if(!pid1) {
                //  close(STDOUT_FILENO);
                //  close(pd[0]);
               execvp(left->argv[0], left->argv);
                dup2(pd[1], 1);
                exit(0);
            }

            if(!pid2) {
                //  close(STDIN_FILENO);
               //  close(pd[1]);
                dup2(pd[0], 0);
                execvp(right->argv[0], right->argv);
                exit(0);
            }
             if(pid1 > 0) {
                int status2;
                int status;
                if(waitpid(pid1, &status, 0) == -1){
                    perror("waitpid1");
                }
                if(waitpid(pid2, &status2, 0) == -1) {
                    perror("waitpid2");
                };
                //  close(pd[0]);
                //  close(pd[1]);
            }


break; 
       }  
    }
    
    int
    getcmd(char *buf, int nbuf)
    {
      
      if (isatty(fileno(stdin)))
        fprintf(stdout, "$ ");
      memset(buf, 0, nbuf);
      fgets(buf, nbuf, stdin);
      if(buf[0] == 0) // EOF
        return -1;
      return 0;
    }
    
    int
    main(void)
    {
      static char buf[100];
      int fd, r;
    
      // Read and run input commands.
      while(getcmd(buf, sizeof(buf)) >= 0){
        if(buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' '){
          // Clumsy but will have to do for now.
          // Chdir has no effect on the parent if run in the child.
          buf[strlen(buf)-1] = 0;  // chop \n
          if(chdir(buf+3) < 0)
            fprintf(stderr, "cannot cd %s\n", buf+3);
          continue;
        }
        if(fork1() == 0)
          runcmd(parsecmd(buf));
        wait(&r);
      }
      exit(0);
    }
    
    int
    fork1(void)
    {
      int pid;
      
      pid = fork();
      if(pid == -1)
        perror("fork");
      return pid;
    }
    
    struct cmd*
    execcmd(void)
    {
      struct execcmd *cmd;
    
      cmd = malloc(sizeof(*cmd));
      memset(cmd, 0, sizeof(*cmd));
      cmd->type = ' ';
      return (struct cmd*)cmd;
    }
    
    struct cmd*
    redircmd(struct cmd *subcmd, char *file, int type)
    {
      struct redircmd *cmd;
    
      cmd = malloc(sizeof(*cmd));
      memset(cmd, 0, sizeof(*cmd));
      cmd->type = type;
      cmd->cmd = subcmd;
      cmd->file = file;
      cmd->mode = (type == '<') ?  O_RDONLY : O_WRONLY|O_CREAT|O_TRUNC;
      cmd->fd = (type == '<') ? 0 : 1;
      return (struct cmd*)cmd;
    }
    
    struct cmd*
    pipecmd(struct cmd *left, struct cmd *right)
    {
      struct pipecmd *cmd;
    
      cmd = malloc(sizeof(*cmd));
      memset(cmd, 0, sizeof(*cmd));
      cmd->type = '|';
      cmd->left = left;
      cmd->right = right;
      return (struct cmd*)cmd;
    }
    
    // Parsing
    
    char whitespace[] = " \t\r\n\v";
    char symbols[] = "<|>";
    
    int
    gettoken(char **ps, char *es, char **q, char **eq)
    {
      char *s;
      int ret;
      
      s = *ps;
      while(s < es && strchr(whitespace, *s))
        s++;
      if(q)
        *q = s;
      ret = *s;
      switch(*s){
      case 0:
        break;
      case '|':
      case '<':
        s++;
        break;
      case '>':
        s++;
        break;
      default:
        ret = 'a';
        while(s < es && !strchr(whitespace, *s) && !strchr(symbols, *s))
          s++;
        break;
      }
      if(eq)
        *eq = s;
      
      while(s < es && strchr(whitespace, *s))
        s++;
      *ps = s;
      return ret;
    }
    
    int
    peek(char **ps, char *es, char *toks)
    {
      char *s;
      
      s = *ps;
      while(s < es && strchr(whitespace, *s))
        s++;
      *ps = s;
      return *s && strchr(toks, *s);
    }
    
    struct cmd *parseline(char**, char*);
    struct cmd *parsepipe(char**, char*);
    struct cmd *parseexec(char**, char*);
    
    // make a copy of the characters in the input buffer, starting from s through es.
    // null-terminate the copy to make it a string.
    char 
    *mkcopy(char *s, char *es)
    {
      int n = es - s;
      char *c = malloc(n+1);
      assert(c);
      strncpy(c, s, n);
      c[n] = 0;
      return c;
    }
    
    struct cmd*
    parsecmd(char *s)
    {
      char *es;
      struct cmd *cmd;
    
      es = s + strlen(s);
      cmd = parseline(&s, es);
      peek(&s, es, "");
      if(s != es){
        fprintf(stderr, "leftovers: %s\n", s);
        exit(-1);
      }
      return cmd;
    }
    
    struct cmd*
    parseline(char **ps, char *es)
    {
      struct cmd *cmd;
      cmd = parsepipe(ps, es);
      return cmd;
    }
    
    struct cmd*
    parsepipe(char **ps, char *es)
    {
      struct cmd *cmd;
    
      cmd = parseexec(ps, es);
      if(peek(ps, es, "|")){
        gettoken(ps, es, 0, 0);
        cmd = pipecmd(cmd, parsepipe(ps, es));
      }
      return cmd;
    }
    
    struct cmd*
    parseredirs(struct cmd *cmd, char **ps, char *es)
    {
      int tok;
      char *q, *eq;
    
      while(peek(ps, es, "<>")){
        tok = gettoken(ps, es, 0, 0);
        if(gettoken(ps, es, &q, &eq) != 'a') {
          fprintf(stderr, "missing file for redirection\n");
          exit(-1);
        }
        switch(tok){
        case '<':
          cmd = redircmd(cmd, mkcopy(q, eq), '<');
          break;
        case '>':
          cmd = redircmd(cmd, mkcopy(q, eq), '>');
          break;
        }
      }
      return cmd;
    }
    
    struct cmd*
    parseexec(char **ps, char *es)
    {
      char *q, *eq;
      int tok, argc;
      struct execcmd *cmd;
      struct cmd *ret;
      
      ret = execcmd();
      cmd = (struct execcmd*)ret;
    
      argc = 0;
      ret = parseredirs(ret, ps, es);
      while(!peek(ps, es, "|")){
        if((tok=gettoken(ps, es, &q, &eq)) == 0)
          break;
        if(tok != 'a') {
          fprintf(stderr, "syntax error\n");
          exit(-1);
        }
        cmd->argv[argc] = mkcopy(q, eq);
        argc++;
        if(argc >= MAXARGS) {
          fprintf(stderr, "too many args\n");
          exit(-1);
        }
        ret = parseredirs(ret, ps, es);
      }
      cmd->argv[argc] = 0;
      return ret;
    }

The part I am struggling with is the pipe implementation:

 case '|':
         pcmd = (struct pipecmd*)cmd;
            struct execcmd *left = (struct execcmd*)pcmd->left;
           struct execcmd *right = (struct execcmd*)pcmd->right;

            int i;
            int pd[2];
            pipe(pd);
            int pid1 = fork1();
            int pid2 = fork1();
            if(!pid1) {
                //  close(STDOUT_FILENO);
                //  close(pd[0]);
               execvp(left->argv[0], left->argv);
                dup2(pd[1], 1);
                exit(0);
            }

            if(!pid2) {
                //  close(STDIN_FILENO);
               //  close(pd[1]);
                dup2(pd[0], 0);
                execvp(right->argv[0], right->argv);
                exit(0);
            }
             if(pid1 > 0) {
                int status2;
                int status;
                if(waitpid(pid1, &status, 0) == -1){
                    perror("waitpid1");
                }
                if(waitpid(pid2, &status2, 0) == -1) {
                    perror("waitpid2");
                };
                //  close(pd[0]);
                //  close(pd[1]);
            }


break; 

I am trying to execute echo "hahaha" | grep "h" without getting any output. Both echo and grep work individually.

I checked that left->argv[0] and right->argv[0] work as well.

Does anybody have an idea what is going on?

PS: I am only implementing so that you can execute one pipe. I am aware of that I would need a loop if i wanted to make it possible to execute more than 2 piped commands

8
  • To begin with you need one fork per command in the pipeline. Right now you parent shell process will be taken over by the "right" command. Commented Feb 23, 2023 at 0:24
  • For simplification, cant I just execute the child command in the pid process and let the parent command be executed in its parent process? Commented Feb 23, 2023 at 0:26
  • Yes you can, but then the program you exec in the parent process will exit and your shell dies with it. Commented Feb 23, 2023 at 0:26
  • Ok, let me make the adjustements and then update the post. Commented Feb 23, 2023 at 0:27
  • Ok, I updated it, allthough I am still having the same issue. Commented Feb 23, 2023 at 0:36

2 Answers 2

2
+50

A little rewrite of your code is below. Main issues were:

  • dup2() is used after execvp, should be done before.
  • fork1() is used to early for "pid2", results in to many forks.
  • parent should always close pd[]
  • childs shoud close the unused pd[] part.
  • command arguments given to execvp are as-is [per your test].

To test the your and the below code, you should not use surrounding quotes for the grep command argument. (That is a seperate parsing issue/challenge)

To test:

$ echo hahahaha | grep a

The rewrite:

   case '|':
         pcmd = (struct pipecmd*)cmd;
         struct execcmd *left = (struct execcmd*)pcmd->left;
         struct execcmd *right = (struct execcmd*)pcmd->right;

          int i;
          int pd[2];
          pipe(pd);
          int pid1 = fork1();
          if(!pid1) {
              //  close(STDOUT_FILENO);
              close(pd[0]);
              dup2(pd[1], 1);
             execvp(left->argv[0], left->argv);
              exit(0);
          }

          int pid2 = fork1();
          if(!pid2) {
              //  close(STDIN_FILENO);
              close(pd[1]);
              dup2(pd[0], 0);
              execvp(right->argv[0], right->argv);
              exit(0);
          }
          close(pd[0]);
          close(pd[1]);

          int status2;
          int status;
          if(waitpid(pid1, &status, 0) == -1){
               perror("waitpid1");
          }
          if(waitpid(pid2, &status2, 0) == -1) {
               perror("waitpid2");
          };
break;

Good luck

Sign up to request clarification or add additional context in comments.

3 Comments

Got it. Makes sense that I have to close it because otherwise dup2 will suspend the process and it wont return. However, with the above solution I am getting the output hahaha but without the h's marked and I am wondering if that "hahaha" is just from executing the echo command but not actually from the grep command. Is that something I have to worry about?
It works. I know that the above is not the case because when i do echo aaaaa | grep b, I am getting no output which is expected behavior. For some reason, the coloring of the h's is not happening though.
Nvm, figured out the coloring as well. I gotta pass color=auto. Thank you.
1

I'm just guessing here, but I think your problem is a deadlock.

Both in your original and updated code you have the call

waitpid(pid, &status, 0);

This will make the current process stop and wait for the process identified by pid to finish. But if that process doesn't finish, then waitpid will never return!

This can happen if, for example, the first part of the pipe fills up the write-buffers of the pipe. That will cause all writing to the pipe to block, and the process will not be able to continue. And since there is no one reading from the other end of the pipe, that is what will happen.

You need to wait for both the child-process in the parent process. The two child-processes should essentially do the same thing:

  1. Set up standard output/input (the dup2 call)
  2. Run the program (the exec call)

Nothing more needs to be done in them. Especially nothing that will delay or block the process indefinitely.


The parent process needs to do this:

  1. Start first child process
  2. Start second child process
  3. Wait for both processes to end

And remember that the order is quite important. If you wait for the first child process between step 1 and 2 then you again have a deadlock.

3 Comments

When I waitpid both child processes in the parent process, it looks like that it is never returning, But why? After all I am running ''exit(0);'' inside the if statements. I am also reading the reading buffer from the pipe with dup2(pd[0], 0] in the second child process. Is it maybe because the second child process is reading faster than the first process writes?
@a6i09per5f dub2 duplicates a descriptor, it does not read from it. And if exec works then your exit calls won't be executed (if exec works, it won't return). There will however be an exit from the program you run in that case. And you need to wait for the child-processes (both of them) after both have been created and started and you have called exec.
Then it may be a different problem because I am doing that now (see code). I think I have to learn how to sufficiently debug child processes with gdb in order to solve this.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.