hadoop-hdfs-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Mitesh Singh Jat (Commented) (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (HDFS-2685) hadoop fs -ls globbing gives inconsistent exit code
Date Fri, 24 Feb 2012 06:28:48 GMT

    [ https://issues.apache.org/jira/browse/HDFS-2685?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13215424#comment-13215424
] 

Mitesh Singh Jat commented on HDFS-2685:
----------------------------------------

As a workaround, I have created a C program to resolve the globbing, and return appropriate
exit code.

{code:title=hadoop_fs_ls_globbing.c}
//-------------------------------------------------------------
/*
 *       Filename:  hadoop_fs_ls_globbing.c
 *    Description:  TO support globbing in `hadoop fs -ls` command
 *        Created:  01/11/2012 09:39:55 AM (IST)
 *         Author:  Mitesh Singh Jat (), mitesh_singh_jat[at]yahoo[dot]co[dot]in
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CMD_LEN 4096    // command line length
#define PATH_LEN 256    // splits in path based on delimiters
#define MAX_PATHS 256   // Maximum resolved paths in globbed path

int print_usage(char *cmd_name) {
    fprintf(stderr, "Usage: %s [-c <hadoop_commands>] path\n",
            cmd_name);
    exit(EXIT_FAILURE);
}

int main(int argc, char *argv[])
{
    char *str1, *str2, *token, *subtoken;
    char *saveptr1, *saveptr2;
    int i, j, k;
    char cmds[MAX_PATHS][CMD_LEN];
    char *a[MAX_PATHS][PATH_LEN];
    int total_paths = 1;
    int nt = 0, nst = 0;
    int retval = 0;
    int exit_code = 0;
    char cmd[CMD_LEN] = "hadoop fs -ls ";
    char delim[] = "{}";
    char subdelim[] = ",";
    int nsts[PATH_LEN];
    int xpaths;

    for (i = 0; i < MAX_PATHS; ++i) {
        strcpy(cmds[i], "");
        for (j = 0; j < PATH_LEN; ++j)
            a[i][j] = (char *) NULL;
    }

    if (argc < 2 || (argc > 2 && argc < 4)) 
        print_usage(argv[0]);
    if (argc > 2) {
        if (strncmp(argv[1], "-c", 2)) 
            print_usage(argv[0]);
        strncpy(cmd, argv[2], CMD_LEN);
        strncat(cmd, " ", CMD_LEN);
        argv += 2;
    }
    a[0][0] = cmd;
    nsts[0] = 1;

    for (j = 1, str1 = argv[1], nt = 0; ; j++, str1 = NULL, ++nt) {
        token = strtok_r(str1, delim, &saveptr1);
        if (token == NULL)
            break;
        //printf("%d: %s\n", j, token);

        for (str2 = token, nst = 0; ; str2 = NULL, ++nst) {
            subtoken = strtok_r(str2, subdelim, &saveptr2);
            if (subtoken == NULL)
                break;
            //printf(" --> %s\n", subtoken);
            a[nt + 1][nst] = subtoken;
        }
        total_paths *= nst;
        if (total_paths > MAX_PATHS) {
            fprintf(stderr, "%s: Total generated paths %d > %d\n",
                    argv[0], total_paths, MAX_PATHS);
            exit(EXIT_FAILURE);
        }
        nsts[nt + 1] = nst;
    }
    xpaths = total_paths;
    for (i = 0; i < nt + 1; ++i) {
        nst = nsts[i];
        if (nst == 1) {
            for (k = 0; k < total_paths; ++k) 
                strncat(cmds[k], a[i][0], CMD_LEN);
        }
        else {
            xpaths = xpaths / nst;
            for (k = 0; k < total_paths; ++k) {
                j = (k / xpaths) % nst;
                strncat(cmds[k], a[i][j], CMD_LEN);
            }
        }
    }

    for (i = 0; i < total_paths; ++i) {
        printf("Running: %s\n", cmds[i]);
        retval = system(cmds[i]);    // hadoop returns 0 - 255
        if (retval != 0) {
            exit_code = ~(retval & 0x000000FF);   // Get exit code as LSB 
            // remove comment for not to check remaining
            //exit(exit_code);     
        }
    }

    return (exit_code);
} /* main */
//-------------------------------------------------------------
{code}

Compilation on Linux/Unix
{noformat}
$ gcc -Wall -o hadoop_fs_ls_globbing hadoop_fs_ls_globbing.c
{noformat}


Now the above c program supports custom hadoop command (with option flag -c), for example

"hadoop fs -ls"
"hadoop fs -test -e"
{noformat}
$ ./hadoop_fs_ls_globbing -c "hadoop fs -test -e" "input/20110{4,1}/{A,B}"; echo $?
Running: hadoop fs -test -e input/201104/A
Running: hadoop fs -test -e input/201104/B
Running: hadoop fs -test -e input/201101/A
Running: hadoop fs -test -e input/201101/B
255    <*****
$ ./hadoop_fs_ls_globbing -c "hadoop fs -ls" "input/20110{4,1}/{A,B}"; echo $?
Running: hadoop fs -ls input/201104/A
ls: Cannot access input/201104/A: No such file or directory.
Running: hadoop fs -ls input/201104/B
ls: Cannot access input/201104/B: No such file or directory.
Running: hadoop fs -ls input/201101/A
Found 2 items
drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/A/1
drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/A/2
Running: hadoop fs -ls input/201101/B
Found 2 items
drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/B/1
drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/B/2
255   <*****
{noformat}
                
> hadoop fs -ls globbing gives inconsistent exit code
> ---------------------------------------------------
>
>                 Key: HDFS-2685
>                 URL: https://issues.apache.org/jira/browse/HDFS-2685
>             Project: Hadoop HDFS
>          Issue Type: Bug
>    Affects Versions: 0.20.2, 0.20.204.0, 0.20.205.0
>            Reporter: Mitesh Singh Jat
>
> _hadoop fs -ls_ command gives exit code for globbed input path, which is the exit code
for the last resolved absolute path. Whereas _ls_ command always give same exit code regardless
of position of non-existent path in globbing.
> {code}$ hadoop fs -mkdir input/20110{1,2,3}/{A,B,C,D}/{1,2} {code}
> Since directory 'input/201104/' is not present, the following command gives 255 as exit
code.
> {code}$ hadoop fs -ls input/20110{1,2,3,4}/ ; echo $? {code}
> {noformat}
> Found 4 items
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/A
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/B
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/C
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/D
> Found 4 items
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/A
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/B
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/C
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/D
> Found 4 items
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/A
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/B
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/C
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/D
> ls: Cannot access input/201104/: No such file or directory.
> 255
> {noformat}
> The directory 'input/201104/' is not present but given as second last parameter in globbing.
> The following command gives 0 as exit code, because directory 'input/201103/' is present.
> {code}$ hadoop fs -ls input/20110{1,2,4,3}/ ; echo $? {code}
> {noformat}
> Found 4 items
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/A
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/B
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/C
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201101/D
> Found 4 items
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/A
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/B
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/C
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201102/D
> ls: Cannot access input/201104/: No such file or directory.
> Found 4 items
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/A
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/B
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/C
> drwxr-xr-x   - mitesh supergroup          0 2011-12-15 11:51 /user/mitesh/input/201103/D
> 0
> {noformat}
> Whereas, on Linux, ls command gives non-zero(2) as exit code, irrespective of position
of non-existent path in globbing.
> {code}$ mkdir -p input/20110{1,2,3,4}/{A,B,C,D}/{1,2} {code}
> {code}$ ls input/20110{1,2,4,3}/ ; echo $? {code}
> {noformat}
> /bin/ls: input/201104/: No such file or directory
> input/201101/:
> ./  ../  A/  B/  C/  D/
> input/201102/:
> ./  ../  A/  B/  C/  D/
> input/201103/:
> ./  ../  A/  B/  C/  D/
> 2
> {noformat}
> {code}$ ls input/20110{1,2,3,4}/ ; echo $? {code}
> {noformat}
> /bin/ls: input/201104/: No such file or directory
> input/201101/:
> ./  ../  A/  B/  C/  D/
> input/201102/:
> ./  ../  A/  B/  C/  D/
> input/201103/:
> ./  ../  A/  B/  C/  D/
> 2
> {noformat}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Mime
View raw message