storm-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d2r <...@git.apache.org>
Subject [GitHub] storm pull request #1642: DO NOT MERGE: Please review STORM-2018: Supervisor...
Date Fri, 26 Aug 2016 19:50:37 GMT
Github user d2r commented on a diff in the pull request:

    https://github.com/apache/storm/pull/1642#discussion_r76477726
  
    --- Diff: storm-core/src/jvm/org/apache/storm/daemon/supervisor/Container.java ---
    @@ -0,0 +1,437 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.storm.daemon.supervisor;
    +
    +import java.io.BufferedReader;
    +import java.io.File;
    +import java.io.FileWriter;
    +import java.io.IOException;
    +import java.io.InputStreamReader;
    +import java.lang.ProcessBuilder.Redirect;
    +import java.util.ArrayList;
    +import java.util.HashMap;
    +import java.util.HashSet;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Set;
    +
    +import org.apache.commons.io.FileUtils;
    +import org.apache.storm.Config;
    +import org.apache.storm.container.ResourceIsolationInterface;
    +import org.apache.storm.generated.LSWorkerHeartbeat;
    +import org.apache.storm.generated.LocalAssignment;
    +import org.apache.storm.generated.ProfileRequest;
    +import org.apache.storm.utils.ConfigUtils;
    +import org.apache.storm.utils.LocalState;
    +import org.apache.storm.utils.Utils;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +import org.yaml.snakeyaml.Yaml;
    +
    +/**
    + * Represents a container that a worker will run in.
    + */
    +public abstract class Container implements Killable {
    +    private static final Logger LOG = LoggerFactory.getLogger(BasicContainer.class);
    +    protected final Map<String, Object> _conf;
    +    protected String _workerId;
    +    protected final String _supervisorId;
    +    protected final int _port;
    +    protected final LocalAssignment _assignment;
    +    protected final AdvancedFSOps _ops;
    +    protected final ResourceIsolationInterface _resourceIsolationManager;
    +    
    +    protected Container(int port, LocalAssignment assignment, Map<String, Object>
conf, 
    +            String supervisorId, ResourceIsolationInterface resourceIsolationManager)
{
    +        _port = port;
    +        _assignment = assignment;
    +        _conf = conf;
    +        _supervisorId = supervisorId;
    +        _resourceIsolationManager = resourceIsolationManager;
    +        _ops = AdvancedFSOps.mk(conf);
    +    }
    +    
    +    /**
    +     * Constructor to use when trying to recover a container from just the worker ID.
    +     * @param workerId the id of the worker
    +     * @param conf the config of the supervisor
    +     * @param supervisorId the id of the supervisor
    +     * @param resourceIsolationManager the isolation manager.
    +     */
    +    protected Container(String workerId, Map<String, Object> conf, 
    +            String supervisorId, ResourceIsolationInterface resourceIsolationManager)
{
    +        _port = -1;
    +        _assignment = null;
    +        _workerId = workerId;
    +        _conf = conf;
    +        _supervisorId = supervisorId;
    +        _resourceIsolationManager = resourceIsolationManager;
    +        _ops = AdvancedFSOps.mk(conf);
    +    }
    +    
    +    /**
    +     * Kill a given process
    +     * @param pid the id of the process to kill
    +     * @throws IOException
    +     */
    +    protected void kill(long pid) throws IOException {
    +        Utils.killProcessWithSigTerm(String.valueOf(pid));
    +    }
    +    
    +    /**
    +     * Kill a given process
    +     * @param pid the id of the process to kill
    +     * @throws IOException
    +     */
    +    protected void forceKill(long pid) throws IOException {
    +        Utils.forceKillProcess(String.valueOf(pid));
    +    }
    +    
    +    @Override
    +    public void kill() throws IOException {
    +        LOG.info("Killing {}:{}", _supervisorId, _workerId);
    +        Set<Long> pids = getAllPids();
    +
    +        for (Long pid : pids) {
    +            kill(pid);
    +        }
    +    }
    +    
    +    @Override
    +    public void forceKill() throws IOException {
    +        LOG.info("Force Killing {}:{}", _supervisorId, _workerId);
    +        Set<Long> pids = getAllPids();
    +        
    +        for (Long pid : pids) {
    +            forceKill(pid);
    +        }
    +    }
    +    
    +    /**
    +     * Read the Heartbeat for the current container.
    +     * @return the Heartbeat
    +     * @throws IOException on any error
    +     */
    +    public LSWorkerHeartbeat readHeartbeat() throws IOException {
    +        LocalState localState = ConfigUtils.workerState(_conf, _workerId);
    +        LSWorkerHeartbeat hb = localState.getWorkerHeartBeat();
    +        LOG.warn("{}: Reading heartbeat {}", _workerId, hb);
    +        return hb;
    +    }
    +
    +    /**
    +     * Is a process alive and running?
    +     * @param pid the PID of the running process
    +     * @param user the user that is expected to own that process
    +     * @return true if it is, else false
    +     * @throws IOException on any error
    +     */
    +    protected boolean isProcessAlive(long pid, String user) throws IOException {
    +        if (Utils.IS_ON_WINDOWS) {
    +            return isWindowsProcessAlive(pid, user);
    +        }
    +        return isPosixProcessAlive(pid, user);
    +    }
    +    
    +    private boolean isWindowsProcessAlive(long pid, String user) throws IOException {
    +        boolean ret = false;
    +        ProcessBuilder pb = new ProcessBuilder("tasklist", "/nh", "/fi", "pid eq"+pid);
    +        pb.redirectError(Redirect.INHERIT);
    +        Process p = pb.start();
    +        try (BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())))
{
    +            if (in.readLine() != null) {
    +                ret = true;
    +            }
    +        }
    +        return ret;
    +    }
    +    
    +    private boolean isPosixProcessAlive(long pid, String user) throws IOException {
    +        boolean ret = false;
    +        ProcessBuilder pb = new ProcessBuilder("ps", "-o", "user", "-p", String.valueOf(pid));
    +        pb.redirectError(Redirect.INHERIT);
    +        Process p = pb.start();
    +        try (BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())))
{
    +            String first = in.readLine();
    +            assert("USER".equals(first));
    +            String processUser;
    +            while ((processUser = in.readLine()) != null) {
    +                if (user.equals(processUser)) {
    +                    ret = true;
    +                    break;
    +                } else {
    +                    LOG.info("Found {} running as {}, but expected it to be {}", pid,
processUser, user);
    +                }
    +            }
    +        }
    +        return ret;
    +    }
    +    
    +    @Override
    +    public boolean areAllProcessesDead() throws IOException {
    +        Set<Long> pids = getAllPids();
    +        String user = getWorkerUser();
    +        
    +        boolean allDead = true;
    +        for (Long pid: pids) {
    +            if (!isProcessAlive(pid, user)) {
    +                LOG.warn("{}: PID {} is dead", _workerId, pid);
    +            } else {
    +                allDead = false;
    +                break;
    +            }
    +        }
    +        return allDead;
    +    }
    +
    +    @Override
    +    public void cleanUp() throws IOException {
    +        cleanUpForRestart();
    +    }
    +
    +    /**
    +     * Setup the container to run.  By default this creates the needed directories/links
in the
    +     * local file system
    +     * PREREQUISITE: All needed blobs and topology, jars/configs have been downloaded
and
    +     * placed in the appropriate locations
    +     * @throws IOException on any error
    +     */
    +    protected void setup() throws IOException {
    +        if (_port <= 0) {
    +            throw new IllegalStateException("Cannot setup a container recovered with
just a worker id");
    +        }
    +        final String topologyId = _assignment.get_topology_id();
    +        if (!SupervisorUtils.doRequiredTopoFilesExist(_conf, topologyId)) {
    +            LOG.info("Missing topology storm code, so can't launch  worker with assignment
{} for this supervisor {} on port {} with id {}", _assignment,
    +                    _supervisorId, _port, _workerId);
    +            throw new IllegalStateException("Not all needed files are here!!!!");
    +        }
    +        String pidsPath = ConfigUtils.workerPidsRoot(_conf, _workerId);
    +        String hbPath = ConfigUtils.workerHeartbeatsRoot(_conf, _workerId);
    +    
    +        FileUtils.forceMkdir(new File(pidsPath));
    +        FileUtils.forceMkdir(new File(ConfigUtils.workerTmpRoot(_conf, _workerId)));
    +        FileUtils.forceMkdir(new File(hbPath));
    +    
    +        Map<String, Object> topologyConf = ConfigUtils.readSupervisorStormConf(_conf,
topologyId);
    +        String user = (String) topologyConf.get(Config.TOPOLOGY_SUBMITTER_USER);
    +        writeLogMetadata(topologyConf, user, topologyId);
    +        ConfigUtils.setWorkerUserWSE(_conf, _workerId, user);
    +        createArtifactsLink(topologyId);
    +    
    +        createBlobstoreLinks(topologyId);
    +    }
    +    
    +    /**
    +     * Write out the file used by the log viewer to allow/reject log access
    +     * @param topologyConf the config for the topology
    +     * @param user the user this is going to run as
    +     * @param topologyId the id of the topology
    +     * @throws IOException on any error
    +     */
    +    @SuppressWarnings("unchecked")
    +    protected void writeLogMetadata(Map<String, Object> topologyConf, String user,
String topologyId) throws IOException {
    +        if (_port <= 0) {
    +            throw new IllegalStateException("Cannot setup a container recovered with
just a worker id");
    +        }
    +        Map<String, Object> data = new HashMap<>();
    +        data.put(Config.TOPOLOGY_SUBMITTER_USER, user);
    +        data.put("worker-id", _workerId);
    +
    +        Set<String> logsGroups = new HashSet<>();
    +        //for supervisor-test
    +        if (topologyConf.get(Config.LOGS_GROUPS) != null) {
    +            List<String> groups = (List<String>) topologyConf.get(Config.LOGS_GROUPS);
    +            for (String group : groups){
    +                logsGroups.add(group);
    +            }
    +        }
    +        if (topologyConf.get(Config.TOPOLOGY_GROUPS) != null) {
    +            List<String> topGroups = (List<String>) topologyConf.get(Config.TOPOLOGY_GROUPS);
    +            logsGroups.addAll(topGroups);
    +        }
    +        data.put(Config.LOGS_GROUPS, logsGroups.toArray());
    +
    +        Set<String> logsUsers = new HashSet<>();
    +        if (topologyConf.get(Config.LOGS_USERS) != null) {
    +            List<String> logUsers = (List<String>) topologyConf.get(Config.LOGS_USERS);
    +            for (String logUser : logUsers){
    +                logsUsers.add(logUser);
    +            }
    +        }
    +        if (topologyConf.get(Config.TOPOLOGY_USERS) != null) {
    +            List<String> topUsers = (List<String>) topologyConf.get(Config.TOPOLOGY_USERS);
    +            for (String logUser : topUsers){
    +                logsUsers.add(logUser);
    +            }
    +        }
    +        data.put(Config.LOGS_USERS, logsUsers.toArray());
    +
    +        File file = ConfigUtils.getLogMetaDataFile(_conf, topologyId, _port);
    +        File parent = file.getParentFile();
    +        if (!Utils.checkFileExists(parent)) {
    +            FileUtils.forceMkdir(file.getParentFile());
    +            _ops.setupStormCodeDir(ConfigUtils.readSupervisorStormConf(_conf, topologyId),
file.getParentFile().getCanonicalPath());
    +        }
    +        Yaml yaml = new Yaml();
    +        try (FileWriter writer = new FileWriter(file)) {
    +            yaml.dump(data, writer);
    +        }
    +    }
    +    
    +    /**
    +     * Create symlink from the containers directory/artifacts to the artifacts directory
    +     * @param topologyId the id of the topology this is for
    +     * @throws IOException on any error
    +     */
    +    protected void createArtifactsLink(String topologyId) throws IOException {
    +        if (_port <= 0) {
    +            throw new IllegalStateException("Cannot setup a container recovered with
just a worker id");
    +        }
    +        String workerDir = ConfigUtils.workerRoot(_conf, _workerId);
    +        String topoDir = ConfigUtils.workerArtifactsRoot(_conf, topologyId);
    +        if (Utils.checkFileExists(workerDir)) {
    +            LOG.debug("Creating symlinks for worker-id: {} topology-id: {} to its port
artifacts directory", _workerId, topologyId);
    +            Utils.createSymlink(workerDir, topoDir, "artifacts", String.valueOf(_port));
    +        }
    +    }
    +    
    +    /**
    +     * Create symlinks for each of the blobs from the container's directory to
    +     * corresponding links in the storm dist directory.
    +     * @param topologyId the id of the topology to do this for.
    +     * @throws IOException on any error.
    +     */
    +    protected void createBlobstoreLinks(String topologyId) throws IOException {
    +        String stormRoot = ConfigUtils.supervisorStormDistRoot(_conf, topologyId);
    +        Map<String, Object> stormConf = ConfigUtils.readSupervisorStormConf(_conf,
topologyId);
    +        String workerRoot = ConfigUtils.workerRoot(_conf, _workerId);
    +        
    +        @SuppressWarnings("unchecked")
    +        Map<String, Map<String, Object>> blobstoreMap = (Map<String, Map<String,
Object>>) stormConf.get(Config.TOPOLOGY_BLOBSTORE_MAP);
    +        List<String> blobFileNames = new ArrayList<>();
    +        if (blobstoreMap != null) {
    +            for (Map.Entry<String, Map<String, Object>> entry : blobstoreMap.entrySet())
{
    +                String key = entry.getKey();
    +                Map<String, Object> blobInfo = entry.getValue();
    +                String ret = null;
    +                if (blobInfo != null && blobInfo.containsKey("localname")) {
    +                    ret = (String) blobInfo.get("localname");
    +                } else {
    +                    ret = key;
    +                }
    +                blobFileNames.add(ret);
    +            }
    +        }
    +        List<String> resourceFileNames = new ArrayList<>();
    +        resourceFileNames.add(ConfigUtils.RESOURCES_SUBDIR);
    +        resourceFileNames.addAll(blobFileNames);
    +        LOG.info("Creating symlinks for worker-id: {} storm-id: {} for files({}): {}",
_workerId, topologyId, resourceFileNames.size(), resourceFileNames);
    +        Utils.createSymlink(workerRoot, stormRoot, ConfigUtils.RESOURCES_SUBDIR);
    +        for (String fileName : blobFileNames) {
    +            Utils.createSymlink(workerRoot, stormRoot, fileName, fileName);
    +        }
    +    }
    +    
    +    /**
    +     * @return all of the pids that are a part of this container.
    +     */
    +    protected Set<Long> getAllPids() throws IOException {
    +        Set<Long> ret = new HashSet<>();
    +        for (String listing: Utils.readDirContents(ConfigUtils.workerPidsRoot(_conf,
_workerId))) {
    +            ret.add(Long.valueOf(listing));
    +        }
    +        
    +        if (_resourceIsolationManager != null) {
    +            Set<Long> morePids = _resourceIsolationManager.getRunningPIDs(_workerId);
    +            if (morePids != null) {
    +                ret.addAll(morePids);
    +            }
    +        }
    +        
    +        return ret;
    +    }
    +    
    +    /** 
    +     * @return the user that some operations should be done as.
    +     */
    +    protected String getWorkerUser() {
    +        return ConfigUtils.getWorkerUser(_conf, _workerId);
    +    }
    +    
    +    /**
    +     * Clean up the container partly preparing for restart.
    +     * By default delete all of the temp directories we are going
    +     * to get a new worker_id anyways.
    +     * @throws IOException on any error
    +     */
    +    public void cleanUpForRestart() throws IOException {
    +        // and another API to cleanup with everything is dead
    --- End diff --
    
    TODO?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message