001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.ha;
019    
020    import java.io.IOException;
021    import java.lang.reflect.Field;
022    import java.util.Map;
023    
024    import org.apache.commons.logging.Log;
025    import org.apache.commons.logging.LogFactory;
026    import org.apache.hadoop.conf.Configured;
027    
028    import com.google.common.annotations.VisibleForTesting;
029    
030    /**
031     * Fencing method that runs a shell command. It should be specified
032     * in the fencing configuration like:<br>
033     * <code>
034     *   shell(/path/to/my/script.sh arg1 arg2 ...)
035     * </code><br>
036     * The string between '(' and ')' is passed directly to a bash shell and
037     * may not include any closing parentheses.<p>
038     * 
039     * The shell command will be run with an environment set up to contain
040     * all of the current Hadoop configuration variables, with the '_' character 
041     * replacing any '.' characters in the configuration keys.<p>
042     * 
043     * If the shell command returns an exit code of 0, the fencing is
044     * determined to be successful. If it returns any other exit code, the
045     * fencing was not successful and the next fencing method in the list
046     * will be attempted.<p>
047     * 
048     * <em>Note:</em> this fencing method does not implement any timeout.
049     * If timeouts are necessary, they should be implemented in the shell
050     * script itself (eg by forking a subshell to kill its parent in
051     * some number of seconds).
052     */
053    public class ShellCommandFencer
054      extends Configured implements FenceMethod {
055    
056      /** Length at which to abbreviate command in long messages */
057      private static final int ABBREV_LENGTH = 20;
058    
059      /** Prefix for target parameters added to the environment */
060      private static final String TARGET_PREFIX = "target_";
061      
062      @VisibleForTesting
063      static Log LOG = LogFactory.getLog(
064          ShellCommandFencer.class);
065      
066      @Override
067      public void checkArgs(String args) throws BadFencingConfigurationException {
068        if (args == null || args.isEmpty()) {
069          throw new BadFencingConfigurationException(
070              "No argument passed to 'shell' fencing method");
071        }
072        // Nothing else we can really check without actually running the command
073      }
074    
075      @Override
076      public boolean tryFence(HAServiceTarget target, String cmd) {
077        ProcessBuilder builder = new ProcessBuilder(
078            "bash", "-e", "-c", cmd);
079        setConfAsEnvVars(builder.environment());
080        addTargetInfoAsEnvVars(target, builder.environment());
081    
082        Process p;
083        try {
084          p = builder.start();
085          p.getOutputStream().close();
086        } catch (IOException e) {
087          LOG.warn("Unable to execute " + cmd, e);
088          return false;
089        }
090        
091        String pid = tryGetPid(p);
092        LOG.info("Launched fencing command '" + cmd + "' with "
093            + ((pid != null) ? ("pid " + pid) : "unknown pid"));
094        
095        String logPrefix = abbreviate(cmd, ABBREV_LENGTH);
096        if (pid != null) {
097          logPrefix = "[PID " + pid + "] " + logPrefix;
098        }
099        
100        // Pump logs to stderr
101        StreamPumper errPumper = new StreamPumper(
102            LOG, logPrefix, p.getErrorStream(),
103            StreamPumper.StreamType.STDERR);
104        errPumper.start();
105        
106        StreamPumper outPumper = new StreamPumper(
107            LOG, logPrefix, p.getInputStream(),
108            StreamPumper.StreamType.STDOUT);
109        outPumper.start();
110        
111        int rc;
112        try {
113          rc = p.waitFor();
114          errPumper.join();
115          outPumper.join();
116        } catch (InterruptedException ie) {
117          LOG.warn("Interrupted while waiting for fencing command: " + cmd);
118          return false;
119        }
120        
121        return rc == 0;
122      }
123    
124      /**
125       * Abbreviate a string by putting '...' in the middle of it,
126       * in an attempt to keep logs from getting too messy.
127       * @param cmd the string to abbreviate
128       * @param len maximum length to abbreviate to
129       * @return abbreviated string
130       */
131      static String abbreviate(String cmd, int len) {
132        if (cmd.length() > len && len >= 5) {
133          int firstHalf = (len - 3) / 2;
134          int rem = len - firstHalf - 3;
135          
136          return cmd.substring(0, firstHalf) + 
137            "..." + cmd.substring(cmd.length() - rem);
138        } else {
139          return cmd;
140        }
141      }
142      
143      /**
144       * Attempt to use evil reflection tricks to determine the
145       * pid of a launched process. This is helpful to ops
146       * if debugging a fencing process that might have gone
147       * wrong. If running on a system or JVM where this doesn't
148       * work, it will simply return null.
149       */
150      private static String tryGetPid(Process p) {
151        try {
152          Class<? extends Process> clazz = p.getClass();
153          if (clazz.getName().equals("java.lang.UNIXProcess")) {
154            Field f = clazz.getDeclaredField("pid");
155            f.setAccessible(true);
156            return String.valueOf(f.getInt(p));
157          } else {
158            LOG.trace("Unable to determine pid for " + p
159                + " since it is not a UNIXProcess");
160            return null;
161          }
162        } catch (Throwable t) {
163          LOG.trace("Unable to determine pid for " + p, t);
164          return null;
165        }
166      }
167    
168      /**
169       * Set the environment of the subprocess to be the Configuration,
170       * with '.'s replaced by '_'s.
171       */
172      private void setConfAsEnvVars(Map<String, String> env) {
173        for (Map.Entry<String, String> pair : getConf()) {
174          env.put(pair.getKey().replace('.', '_'), pair.getValue());
175        }
176      }
177    
178      /**
179       * Add information about the target to the the environment of the
180       * subprocess.
181       * 
182       * @param target
183       * @param environment
184       */
185      private void addTargetInfoAsEnvVars(HAServiceTarget target,
186          Map<String, String> environment) {
187        for (Map.Entry<String, String> e :
188             target.getFencingParameters().entrySet()) {
189          String key = TARGET_PREFIX + e.getKey();
190          key = key.replace('.', '_');
191          environment.put(key, e.getValue());
192        }
193      }
194    }