# Planet Primates

## June 27, 2016

### StackOverflow

#### Probability outputs for SVM(SMO) in WEKA

I have two input classes, 0 and 5. I have trained my classifier using these two labels on the training data.

While testing, before being clearly assigned to every class, I wish to obtain the probabilities that the SVM calculates for every instance.

When I use classifier.distributionForInstance(), I am only obtaining 0.000 and 1.000 values pertaining to every instance. Eg. [0 : 1.000][5 : 0.000][0 : 1.000][5 : 0.000]

While creating the relation for the arff file, I defined the label attribute as follows bw.write("@attribute label {0,5}"); , where bw is a BufferedWriter object.

What should be the procedure for obtaining the intermediate probability values of the SVM?

#### InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [1000,625]

I get the above unexpected error when trying to run this code:

# -*- coding: utf-8 -*-
"""
Created on Fri Jun 24 10:38:04 2016

@author: andrea
"""

# pylint: disable=missing-docstring
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
from pylab import *
import argparse
import mlp

# Basic model parameters as external flags.
tf.app.flags.FLAGS = tf.python.platform.flags._FlagValues()
tf.app.flags._global_parser = argparse.ArgumentParser()
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 20, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 1000, 'Batch size. Must divide evenly into the dataset sizes.')
flags.DEFINE_integer('num_samples', 100000, 'Total number of samples. Needed by the reader')
flags.DEFINE_string('training_set_file', 'godzilla_dataset_size625', 'Training set file')
flags.DEFINE_string('test_set_file', 'godzilla_testset_size625', 'Test set file')
flags.DEFINE_string('test_size', 1000, 'Test set size')

def placeholder_inputs(batch_size):

images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, mlp.NUM_INPUT))
labels_placeholder = tf.placeholder(tf.float32, shape=(batch_size, mlp.NUM_OUTPUT))
return images_placeholder, labels_placeholder

def fill_feed_dict(data_set_file, images_pl, labels_pl):

for l in range(int(FLAGS.num_samples/FLAGS.batch_size)):
data_set = genfromtxt("../dataset/" + data_set_file, skip_header=l*FLAGS.batch_size, max_rows=FLAGS.batch_size)
data_set = reshape(data_set, [FLAGS.batch_size, mlp.NUM_INPUT + mlp.NUM_OUTPUT])
images = data_set[:, :mlp.NUM_INPUT]
labels_feed = reshape(data_set[:, mlp.NUM_INPUT:], [FLAGS.batch_size, mlp.NUM_OUTPUT])
images_feed = reshape(images, [FLAGS.batch_size, mlp.NUM_INPUT])

feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
}

yield feed_dict

images = data_set[:, :mlp.NUM_INPUT]
labels_feed = reshape(data_set[:, mlp.NUM_INPUT:], [data_set.shape[0], mlp.NUM_OUTPUT])
images_feed = reshape(images, [data_set.shape[0], mlp.NUM_INPUT])

feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
}

return feed_dict, labels_pl

def run_training():

tot_training_loss = []
tot_test_loss = []
tf.reset_default_graph()
with tf.Graph().as_default() as g:
images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)
test_images_pl, test_labels_pl = placeholder_inputs(FLAGS.test_size)
logits = mlp.inference(images_placeholder)
test_pred = mlp.inference(test_images_pl, reuse=True)
loss = mlp.loss(logits, labels_placeholder)
test_loss = mlp.loss(test_pred, test_labels_pl)
train_op = mlp.training(loss, FLAGS.learning_rate)

#summary_op = tf.merge_all_summaries()

init = tf.initialize_all_variables()

saver = tf.train.Saver()
sess = tf.Session()
#summary_writer = tf.train.SummaryWriter("./", sess.graph)

sess.run(init)
test_feed, test_labels_placeholder = reader(FLAGS.test_set_file, test_images_pl, test_labels_pl)

# Start the training loop.
for step in xrange(FLAGS.max_steps):
start_time = time.time()
feed_gen = fill_feed_dict(FLAGS.training_set_file, images_placeholder, labels_placeholder)
i=1
for feed_dict in feed_gen:
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
_, test_loss_val = sess.run([test_pred, test_loss], feed_dict=test_feed)
tot_training_loss.append(loss_value)
tot_test_loss.append(test_loss_val)
#if i % 10 == 0:
#print('%d minibatches analyzed...'%i)
i+=1

if step % 1 == 0:
duration = time.time() - start_time
print('Epoch %d (%.3f sec):\n training loss = %f \n test loss = %f ' % (step, duration, loss_value, test_loss_val))

predictions = sess.run(test_pred, feed_dict=test_feed)
savetxt("predictions", predictions)
savetxt("training_loss", tot_training_loss)
savetxt("test_loss", tot_test_loss)
plot(tot_training_loss)
plot(tot_test_loss)
figure()
scatter(test_feed[test_labels_placeholder], predictions)

#plot([.4, .6], [.4, .6])

run_training()

#if __name__ == '__main__':
#  tf.app.run()

this is mlp:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

import tensorflow as tf

NUM_OUTPUT = 1
NUM_INPUT = 625
NUM_HIDDEN = 5

def inference(images, reuse=None):
with tf.variable_scope('hidden1', reuse=reuse):
weights = tf.get_variable(name='weights', shape=[NUM_INPUT, NUM_HIDDEN], initializer=tf.contrib.layers.xavier_initializer())
weight_decay = tf.mul(tf.nn.l2_loss(weights), 0.00001, name='weight_loss')
biases = tf.Variable(tf.constant(0.0, name='biases', shape=[NUM_HIDDEN]))
hidden1_output = tf.nn.relu(tf.matmul(images, weights)+biases, name='hidden1')

with tf.variable_scope('output', reuse=reuse):
weights = tf.get_variable(name='weights', shape=[NUM_HIDDEN, NUM_OUTPUT], initializer=tf.contrib.layers.xavier_initializer())
weight_decay = tf.mul(tf.nn.l2_loss(weights), 0.00001, name='weight_loss')
biases = tf.Variable(tf.constant(0.0, name='biases', shape=[NUM_OUTPUT]))
output = tf.nn.relu(tf.matmul(hidden1_output, weights)+biases, name='output')

return output

def loss(outputs, labels):

rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(labels, outputs))), name="rmse")

def training(loss, learning_rate):

tf.scalar_summary(loss.op.name, loss)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op

here the error:

Traceback (most recent call last):

File "<ipython-input-1-f16dfed3b99b>", line 1, in <module>
runfile('/home/andrea/test/python/main_mlp_yield.py', wdir='/home/andrea/test/python')

File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 714, in runfile
execfile(filename, namespace)

File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)

File "/home/andrea/test/python/main_mlp_yield.py", line 127, in <module>
run_training()

File "/home/andrea/test/python/main_mlp_yield.py", line 105, in run_training
_, test_loss_val = sess.run([test_pred, test_loss], feed_dict=test_feed)

File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 372, in run

File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 636, in _run

File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run

File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)

InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [1000,625]
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[1000,625], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'Placeholder', defined at:
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/start_ipython_kernel.py", line 205, in <module>
__ipythonkernel__.start()
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 442, in start
ioloop.IOLoop.instance().start()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 162, in start
super(ZMQIOLoop, self).start()
File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 883, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 391, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 199, in do_execute
shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2723, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2831, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2885, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-f16dfed3b99b>", line 1, in <module>
runfile('/home/andrea/test/python/main_mlp_yield.py', wdir='/home/andrea/test/python')
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)
File "/home/andrea/test/python/main_mlp_yield.py", line 127, in <module>
run_training()
File "/home/andrea/test/python/main_mlp_yield.py", line 79, in run_training
images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)
File "/home/andrea/test/python/main_mlp_yield.py", line 37, in placeholder_inputs
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, mlp.NUM_INPUT))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 895, in placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1238, in _placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()

I really don't understand why. It looks to me that I'm feeding all the placeholders before using them. I also removed the "merge_all_summaries" since this problem is similar to other (this and this), but it didn't help

### CompsciOverflow

#### Online sorting without modifications

There is an array with $n$ places. There is a stream of $n$ unique numbers that arrive at a random order (permutation selected uniformly at random).

Whenever a number arrives, we must put it somewhere in the array, and we are not allowed to move it later. The goal is to have as many numbers as possible (in expectation) in their correct location in the final array. The "correct location" is defined as the location where it would appear if the numbers were sorted.

What is known about this problem? What is the best expected success rate, and what algorithm attains it?

Notes:

• A related question is: What is the fastest online sorting algorithm? . It discusses a situation in which items can be moved when new items arrive, and the goal is to minimize the running time.

• I am mainly interested in maximizing the expected number of the correct positions. However, it is also interesting if there is a way to improve the probability that all numbers are in the correct position, above O(1/n!)

### Fefe

#### Lesetipp: Fleischhauer über Brexit. Money Quote:Überall ...

Lesetipp: Fleischhauer über Brexit. Money Quote:
Überall kann man jetzt lesen, viele Briten hätten gar nicht gewusst, wogegen sie stimmen und würden ihre Entscheidung gerne rückgängig machen. Doch das ist Teil des therapeutischen Programms, das die Medien hierzulande zur psychologischen Entlastung ihrer Leser anbieten. Auch der Mythos, die Alten hätten den Jungen die Zukunft gestohlen, lässt sich leicht widerlegen. Bei den 18- bis 24-Jährigen lag laut Sky Data die Wahlbeteiligung bei 36 Prozent. Wenn überhaupt, dann haben die Jungen die Zukunft verpennt.

### StackOverflow

#### Studying fluctuations in time series

I have some time series to analyze.
Given the domain the data is coming from -

• Time series is supposed to have some fluctuations.
• A regular periodicity might not be present at all in some cases. There might be some irregular periods of droughts (no fluctuations happening at all)
• These fluctuations may be a part of an overall down/up trend.

I am trying to avoid modeling techniques like ARIMA etc. since I am only interested in knowing the following features for each one of them:

• Average amplitude of fluctuations.
• Average time period of fluctuations (how long it takes for values to rise and fall back to almost same level?).
• Average frequency of fluctuations. After what period do these fluctuations occur?

Following is what some of the data looks like:

The approach I am taking is to -

First build some sort of annotation on the time-axis (e.g. flat, increasing, decreasing)
Then based on these tags study further the patterns to answer the above questions.
In case there is an overall up/down trend in the series I am de-trending it by removing mean/linear-fit, etc.

I was wondering if there is any other approach or technique to answer the above mentioned questions for my data.

#### xgboost predict method returns the same predicted value for all rows

I've created an xgboost classifier in Python:

train is a pandas dataframe with 100k rows and 50 features as columns. target is a pandas series

objective='reg:linear', n_estimators=100)
xgb_classifier = xgb_classifier.fit(train, target)

predictions = xgb_classifier.predict(test)

However, after training, when I use this classifier to predict values the entire results array is the same number. Any idea why this would be happening?

Data clarification: ~50 numerical features with a numerical target

I've also tried RandomForest Regression from sklearn with the same data and it does give realistic predictions. Perhaps a legitimate bug in the xgboost implementation?

#### Product Recommendation

I want to build a product recommender for a given user using Azure ML. The data set I have contains

ProductId CustomerId
234       01
236       01
235       02
...       ...

The main problem I am facing is that all the resources available on net have given how to build recommendation if there are ratings available? Is there anyway to build recommendation with out that

### StackOverflow

#### zip_longest without fillvalue

I am searching for a middle ground between Python's zip and zip_longest functions (from the itertools module), that exhausts all given iterators, but does not fill in anything. So, for example, it should transpose tuples like so:

(11, 12, 13    ),        (11, 21, 31, 41),
(21, 22, 23, 24),  -->   (12, 22, 32, 42),
(31, 32        ),        (13, 23,     43),
(41, 42, 43, 44),        (    24,     44)

(Spaces added for nicer graphical alignment.)

I managed to compose a crude a solution by cleaning out the fillvalues after zip_longest.

return map(
partial(filter, partial(ne, sentinel)),
zip_longest(*iterables, fillvalue=sentinel))

Is there a way to do this without introducing the sentinels to begin with? Can this be improved using yield? Which approach seems most efficient?

### QuantOverflow

#### Matlab interest rates calibration script

I've got a matlab script from a Matlab webinar about calibration of the G2++ model. This should give the parameters of the G2++ model based on historical yield curves (I know you can use swaption vols but I also want to use this calibration technique).

The script uses the g2ppssm function handle. When I run the code if gives me the error that this function in unknown. I think I need to specify this function but don't know how.

Help is much appreciated. Hereby the script.

EstimationData = %matrix containing historical yield curves InitZeroRates = EstimationData(1,:);

tau = [3/12 6/12 1 2 3 5 7 10 20 30]; %tenors

DeltaT = 1/252; nObs = size(EstimationData,1); G2PPSSM = ssm(@(c)g2ppssm(c,[tau(:) InitZeroRates(:)],tau,DeltaT,nObs,EstimationData));

D0 = .1*ones(1,length(tau));

x0_G2PP = [.4 .1 .15 .007 -.5]; lb_G2PP = [.01 .01 .001 .001 -1]; ub_G2PP = [1 1 1 1 1]; %

[KalmanG2PP,histG2PPParam] = estimate(G2PPSSM,EstimationData,[x0_G2PP D0],... 'lb',[lb_G2PP zeros(1,length(tau))],'ub',[ub_G2PP ones(1,length(tau))],... 'Display','off','univariate',true);

### CompsciOverflow

#### How to prove decidability of halting problem for PDAs with $\epsilon$-transitions?

For LBAs it's rather easy to prove the decidability of the halting problem - as there can only be a finite number of different configurations when using limited space.

But what about PDAs with $\epsilon$-transitions? There, the stack may be infinitely large, and I assume it should be much harder here to see if the PDA is in an infinite loop or not.

I don't think it's as easy as answered here for non-$\epsilon$ PDAs:

[For] DFAs or PDAs, the halting problem is decidable: the machine always halts because it halts when it reaches the end of its input, the input is finite and the machine consumes one character of input at every step.

The answer to my earlier question (Is a PDA's stack size linear bounded in input size?) also seems to point in a direction of a higher difficulty of proving this...

(I don't need an actual proof, a confirmation that it actually is "hard" would suffice!)

### QuantOverflow

Suppose a put option on a stock $S(t)$ following a Geometric Brownian motion is given, with strike $K$ and maturity $T$. Let us denote its price at time $t$ by $p(t,S(t))$. Now, by no-arbitrage consideration, we can easiy see that the price at time $t$ of this option is always at least the instrinsic value of the option itself, namely, the following inequality holds: $$p(t,S(t))\geq (K-S(t))^+$$ To see this, suppose for example that the opposite inequality holds, i.e., $p(t,S(t)) < (K-S(t))^+$. If so, then at time $t$ buy the put option, buy the stock and exercise the option. It results a riskless profit of $K-S(t)-p(t,S(t)) >0$.

So, from this simple no-arbitrage argument, we see that the price of the option must always be at least its intrisic value. However, at this point I realized something strange: if this is true, why in the world should I exercise my put option before expiry?? The inequality seems to indicate that it would be an unwise decision to ever exercise the American put option at time $t$, and so, the only right time to exercise an American put option would be at expiry, making the American put like an European put (the same as for the American call case).

And yet, we know that the American put option is not the same as the European one. So in my argument there should be something wrong.... but what, exactly??

### StackOverflow

#### Learning curve does it show bias/variance?

How can I conclude from this figure if my SVM model suffers from bias or variance?

#### How does one build "Learning Machines" ?

Machine Learning : Train a model using training data. Model learns from (training) data but stops learning thereafter. Does not continuously learn from its mistakes.

Learning Machine : a machine that predicts, monitors the prediction made, and if the prediction is wrong (or way of its target) the prediction and target are sent back as feedback, feedback is ingested at once, leading to a new training, before the next prediction occurs.

The way learning machines are built is : by adding a feedback ingestion loop to the machine/model. Look at the figure below:

How does one add feedback ingestion loop to an existing ML model ? I am looking for all the gory details. Any pointers will be of great help.

If you are looking for more explanation on this question, pls read the following blog entry : Machine Learning != Learning Machine

### Lobsters

#### Minoca OS Documentation

Minoca seems to be a new operating system focusing on embedded, written from scratch. I post it here because that alone is interesting. If I were to describe it, systems-like it looks like a simplified NT (similar driver model, API organization, object manager) but focused on the POSIX API.

### QuantOverflow

#### Models crumbling down due to negative (nominal) interest rates

Given that the negative interest rates on a lot of sovereign bonds with maturity under 10 years are trading in the negative (nominal) interest rate territory (recently also the short term EURIBOR has dropped below zero), which are the most striking applications for the models in financial economics/quant finance field?

By that I mean which of the so called "stylized facts" and standard models of modern finance are becoming highly controversial or just plain useless? As a couple of examples which spring to mind are the following (do not necessarily have to do with sovereign bond yields, but the concept of negative (nominal) interest rates as such):

• The CIR interest rates model completely breaks down due to the square root term
• The proof that an American call option written on a non-dividend paying underlying will not be exercised before the maturity is false
• Markowitz selection obviously encounters difficulties incorporating negative yields

What are the other consequences, on let us say, CAPM, APT, M&M or any other model in finance? Which long held beliefs are hurt the most by negative yields?

### StackOverflow

#### Can Convolution Neural Networks(CNNs) be used for real time object classifications and robotic sorting machines?

With basic knowledge in computer vision/ digital image processing i have started work with deep neural networks and CNNs especially on Caffe and Nividea digits. My question is with regards to deplyoment of the models.

Can CNNs be used in say for instance a production line robot ? These robots require ms or even sub ms processing time often as there are other processor overheads such as commands to PLCs etc. Is the current tech at a level where we can use trained CNN models already ? Could any of you point me towrds materials where i could learn about how to deploy and implement a trained validated and tested models.

#### Incorporating user feedback in production deployed DeepLearning ML model

I have developed a ML model using deep learning (LSTM in keras on top of theano) and deployed it in production environment. This model does a classification (0/1) for a NLP task. The prediction of the model is displayed to users, and the users have the option to give a feedback (if the prediction was right/wrong).

How can I continuously incorporate this feedback in my model ? From a UX stand point you dont want a user to correct/teach the system more than twice/thrice for a specific input, system shld learn fast i.e. so the feedback shld be incorporated "fast". (Google priority inbox does this in a seamless way)

How does one incorporate the feedback. I have searched a lot on net but could not find relevant material. any pointers will be of great help.

Pls dont say retrain the model from scratch by including new data points. Thats surely not how google and facebook build their smart systems

To further explain my question - think of google's spam detector or their priority inbox or their recent feature of "smart replies". Its a well known fact that they are built using DeepNets and that it has the ability to learn / incorporate (fast) user feed.

All the while when it incorporates the user feedback fast (i.e. user has to teach the system correct output atmost 2-3 times per data point and the system start to give correct output for that data point) AND it also ensure it maintains old learnings and does not start to give wrong outputs on older data points (where it was giving right output earlier) while incorporating the learning from new data point.

I have not found any blog/literature/discussion w.r.t how to build such systems - An intelligent system that uses Deeplearning and can incorporate user feedback in "fast" way

Hope my question is little more clear now.

Update: Some related questions I found are:

Update: I still dont have a concrete answer but such a recipe does exists. Read the section "Learning from the feedback" in the following blog Machine Learning != Learning Machine. In this Jean talks about "adding a feedback ingestion loop to machine"

### CompsciOverflow

#### Does there exist context-free grammar with words of length n^2 or n^3?

Does there exist context-free grammar with words of length $n^2$ or $n^3$? I can't see any, we can produce all grammar with words of length $n$ ($S \to Se$), but then it seems to be impossible to substitute each $S$ with sequence of $S$'s of length $n$.

### infra-talk

#### Are You Smarter Than the Compiler?

So, are you? It’s extremely unlikely, but you probably knew that. I recently questioned whether I could refactor away a branch in some arithmetic code, and I had a fun time rediscovering first-hand that I am not, in fact, smarter.

## The Scenario

On my current project, I was working on some code that would take an integer (from 0-23) representing an hour of the day and format it for display to the user. This being the United States, we wanted to display it in 12-hour time. I’m sure you’re already imagining the implementation of a function such that f(23)=11, f(1)=1, f(0)=12, and so on.

The easiest implementation you’re thinking of probably looks like this:

int normalize(int h) {
h = h % 12;
if (h == 0) h = 12;
return h;
}

…or, at least it did for me.

As a software consultant, my sharpest skills are in the expression of logic. The value I create is largely derived from my ability to analyze my clients’ business domain and synthesize from it logical machinery that is useful, succinctly expressed, maintainable, and—not least of all—correct. I don’t often run into situations where performance is a concern.

Despite this, I know (only) a few things about how computers work. I can tell you that modern processors implement a pipelining architecture, where they achieve greater performance by attempting to execute multiple machine instructions at the same time, or even in different orders. I also know that branches, such as that if (h == 0) above, pose an interesting challenge to the processor.

What if the processor is trying to dispatch the conditional branch (if (h == 0)), but it has not yet finished computing the new value of h from the previous step? The CPU will guess. Perhaps it’ll decide that h is probably going to be zero, so it’ll continue along and set h to 12. Disastrously, if the processor comes to realize that h % 12 did not yield zero, it will effectively have to throw away the work it did, jump back to the if statement, and start over.

I also know this code will be running on a phone—potentially very frequently—as a user quickly scrolls through items in a table view. Furthermore, my mathematical intuition tells me I should be able to express this function using another modulus instead of a branch, which would keep the processor’s pipeline chugging along happily. I wondered aloud whether it was worth my time to bother refactoring.

## The Experiment

This is when Job, my partner on the project, chimed in. As I expected, he said there’s no way it could be worth our time, as modern compilers should be able to see right through such simple code. Regardless, we were both feeling exceptionally curious about what the compiler would actually do, and we couldn’t just leave it at that. So Job introduced me to gcc.godbolt.org, and we began to look at the assembly output for two implementations of this function. In particular, this is the code that would be generated by LLVM Clang 3.8 with the -Os flag.

 int branchingNormalize(int h) { h = h % 12; if (h == 0) h = 12; return n; } int modulusNormalize(int h) { return ((h + 11) % 12) + 1; } branchingNormalize(int): movsxd rcx, edi imul rax, rcx, 715827883 mov rdx, rax shr rdx, 63 shr rax, 33 add eax, edx shl eax, 2 lea eax, [rax + 2*rax] sub ecx, eax mov eax, 12 cmovne eax, ecx ret modulusNormalize(int): add edi, 11 movsxd rax, edi imul rcx, rax, 715827883 mov rdx, rcx shr rdx, 63 sar rcx, 33 add ecx, edx shl ecx, 2 lea ecx, [rcx + 2*rcx] sub eax, ecx inc eax ret

## What’s Going On Here?

As Job predicted, these implementations look vastly more similar than dissimilar. They even share the same number of instructions.

Naturally, I first wanted to know where the branch was—if there was one at all. Indeed, there isn’t. Instead, the second-to-last instruction of branchingNormalize is cmovne, which is a conditional move. This might seem to be a petty distinction on the surface, but it’s actually quite significant. This conditional move will end up potentially modifying the eax register, but it would never result in changing which instructions need to be executed next. The processor is quite capable of handling data dependencies between instructions as it parallelizes and reorders them.

Looking beyond that, I was fairly confused about the rest of the instructions; there’s no integer division at all.

From grade school, you may remember how much more difficult it was to divide two numbers than it was to multiply them. You may also remember, from your college computer architecture class, that processors find it similarly more difficult to compute. Personally, I had long buried both of those memories. After all, that’s why I have a computer, and it never seemed to make a fuss about it.

That’s totally okay, as it turns out—Clang has my back. Rather than ask the processor to perform division in pursuit of our remainder, it’s figured out that it can achieve the same thing by utilizing a combination of multiplication, bit shifting, addition, and subtraction. The numbers 715827883, 63, and 33 were picked by the compiler in order to leverage integer overflow, based on the number of bits used to store the integers. Essentially, really cool magic.

## Conclusion

This was an extremely fun diversion, and it’s one of the many examples of why it’s great to work with such smart colleagues. Without Job’s enthusiasm, it’s doubtful I would have actually done this analysis.

At first thought, it made me slightly uncomfortable to see how well the compiler can optimize. If the compiler doesn’t care, should I? What’s the point in paying attention to those small details in my code unless I actually observe a problem?

Upon slightly deeper reflection, however, I find that fear to be silly. I create much more value for the world, and have more fun, when I’m thinking and working at a higher level. I want to be as close as possible to pure logic, not micro-managing (pun intended?) hardware.

The post Are You Smarter Than the Compiler? appeared first on Atomic Spin.

### Planet Theory

#### TR16-100 | A Satisfiability Algorithm for Depth Two Circuits with a Sub-Quadratic Number of Symmetric and Threshold Gates | Suguru Tamaki

We consider depth 2 unbounded fan-in circuits with symmetric and linear threshold gates. We present a deterministic algorithm that, given such a circuit with $n$ variables and $m$ gates, counts the number of satisfying assignments in time $2^{n-\Omega\left(\left(\frac{n}{\sqrt{m} \cdot \poly(\log n)}\right)^a\right)}$ for some constant $a>0$. Our algorithm runs in time super-polynomially faster than $2^n$ if $m=O(n^2/\log^b n)$ for some constant $b>0$. Previously, such algorithms were only known for bounded depth circuits with linear threshold gates and a slightly super-linear number of wires [Impagliazzo-Paturi-Schneider, FOCS 2013 and Chen-Santhanam-Srinivasan, CCC 2016]. We also show that depth 2 circuits with $O(n^2/\log^b n)$ symmetric and linear threshold gates in total cannot compute an explicit function computable by a deterministic $2^{O(n)}$-time Turing machine with an NP oracle. Previously, even slightly super-linear lower bounds on the number of gates were not known until recently Kane and Williams [STOC 2016] showed that depth 2 linear threshold circuits with $o(n^{3/2}/\log^3 n)$ gates cannot compute an explicit function computable in linear time.

### QuantOverflow

#### Compare two time series with different frequencies

Lets say I have two time series $X_t$ and $Y_{t,q}$. As an examples, lets say $X_t$ is a series that measures year over year changes in the level of output of a good (say number of widgets). So $X_t = \frac{Widgets_t}{Widgets_{t-1}} - 1$. I have another series $Y_{t,q}$ that is quarterly and measures changes in number of workers for the company (and would like to use this series because I think that the changes in workers $w_{t,q}$ would be indicative of the change in number of widgets sold. The quarterly series would hopefully provide a good indicator

How would I actually best compare the two series?

1. I could take the mean of the quarters of a given year for $Y_t$ so then I would get $Y^{mean}_t = \frac{1}{4}\sum \limits_{i=1}^4Y_{t,i}-1=\frac{1}{4} \left( \frac{w_{t,1}}{w_{t-1,4}}+\frac{w_{t,2}}{w_{t,1}}+\frac{w_{t,3}}{w_{t,2}}+\frac{w_{t,4}}{w_{t,3}} \right) - 1$

2. Or alternatively, I could take a geometric mean. $Y^{geomean}_t = \left( \prod \limits_{i=1}^4Y_{t,i} \right)^{1/4}=\left( \frac{w_{t,4}}{w_{t-1,4}}\right)^{\frac{1}{4}} - 1$

Both dont seem like the most ideal way since the mean method measures more of an interyear change and the geomean measures last quarter of the year changes.

### StackOverflow

#### How to build Neural Network model using word vectors in R? [on hold]

I have word vectors in following format:

in    0.031   0.097      0.843       -0.019      0.296       0.228
is   -0.112   0.144      0.332       -0.689      0.289      0.216

I have calculated word vectors using Glove package in R. Now I want to build neural network in R using these word vectors as input. Please suggest me any package in R or function.

### CompsciOverflow

#### I know the algorithms, but i still don't know how to approach the questions

I study Graphs Analysis by myself and i understood most of the material just fine. But, there is one huge problem with my approach that prevents me from solving tests. I don't know how to build new Graphs based on requirements and i'll give an examples:

Question: G = (V,E) is a directed graph with no cycles (DAG) and w: E->R for each edge. There is an efficient algorithm that in given vertices s and t in the graph it finds the minimum weight path contains with at least 3 edges. So far it great - i understand the question just fine.

Step 1: For each vertex v in V, we mark: v1 = (v, 1), v1 = (v, 2), v3 = (v, 3), v4 = (v, 4)

Given G = (V, E), we'll build new graph G'=(V',E') where: V'={v1, v2, v3, v4| for each v in V}

E' = {(v1, u2), (v2, u3), ____(1)____}

for each (ui, vj) in E', w(e) = {w(u, v)| (u, v) in E and (ui, vj) in E'}

(1) = ?

a. (v3, u4), (v4, u4) - the correct answer

b. (v3, u3)

c. (v3, u4),(v4, u3)

d. (v3, u4)

step 2: Apply DAG algorithm from s1.

step 3: return the weight of the path from s1 to t4.

I know how DAG works, but i don't know how to build the new graph in order to see it, how do i build the new graph?

### StackOverflow

#### Any sources for learning nasm assembly programming in Windows? [on hold]

I would like to learn Assembly Programming for Windows using nasm assembler. But I can't find the good tutorial in online ,I found all material for linux not for windows and that for masm, Is there any tutorial or ebook for windows on nasm assembler?

#### Statistical approach to chess?

Reading about how Google solves the translation problem got me thinking. Would it be possible to build a strong chess engine by analysing several million games and determining the best possible move based largely (completely?) on statistics? There are several such chess databases (this is one that has 4.5 million games), and one could potentially weight moves in identical (or mirrored or reflected) positions using factors such as the ratings of the players involved, how old the game is (to factor in improvements in chess theory) etc. Any reasons why this wouldn't be a feasible approach to building a chess engine?

### QuantOverflow

#### Stochastic Calculus Rescale Exercise

I have the following system of SDE's

$dA_t = \kappa_A(\bar{A}-A_t)dt + \sigma_A \sqrt{B_t}dW^A_t \\ dB_t = \kappa_B(\bar{B} - B_t)dt + \sigma_B \sqrt{B_t}dW^B_t$

If $\sigma_B > \sigma_A$ I would consider the volatility $B_t$ to be more volatile than $A_t$ because

$d\langle A_\bullet\rangle_t = \sigma_A^2 B_t dt$ and $d\langle B_\bullet\rangle_t = \sigma_B^2 B_t dt$

Now, if I rescale the process $B$ by $\sigma_A^2$ and define $\sigma_A^2B =\tilde{B}$, I get the an equivalent system of SDE's

$dA_t = \kappa_A(\bar{A}-A_t)dt + \sqrt{\tilde{B}_t}dW^A_t \\ d\tilde{B}_t = \kappa_B(\sigma_A^2\bar{B} - \tilde{B}_t)dt + \sigma_A\sigma_B \sqrt{\tilde{B}_t}dW^B_t$

But now the claim "If $\sigma_B > \sigma_A$ I would consider the volatility $\tilde{B}_t$ to be more volatile than $A_t$" does not hold anymore. Consider $1>\sigma_B>\sigma_A$ and

$d\langle A_\bullet\rangle_t = \tilde{B}_t dt$ and $d\langle \tilde{B}_\bullet\rangle_t = \sigma_A^2\sigma_B^2 \tilde{B}_t dt$.

In this case the volatility $\tilde{B}$ of $A$ is more volatile than $A$ only if $\sigma_A^2\sigma_B^2>1$, which is completely different from the condition above ($\sigma_B > \sigma_A$).

What went wrong? Is there some error in the rescalling?

### StackOverflow

#### What is the difference between partial fit and warm start?

Context:

I am using Passive Aggressor from scikit library and confused whether to use warm start or partial fit.

Efforts hitherto:

https://github.com/scikit-learn/scikit-learn/issues/1585

1. Gone through the scikit code for _fit and _partial_fit.

My observations:

1. _fit in turn calls _partial_fit.

2. When warm_start is set, _fit calls _partial_fit with self.coef_

3. When _partial_fit is called without coef_init parameter and self.coef_ is set, it continues to use self.coef_

Question:

I feel both are ultimately providing the same functionalities.Then, what is the basic difference between them? In which contexts, either of them are used?

Am I missing something evident? Any help is appreciated!

### Fefe

#### Old and busted: Einparkhilfe hilft beim Auto-Einparken.New ...

Old and busted: Einparkhilfe hilft beim Auto-Einparken.

New hotness: Strafmaßhilfe hilft beim Verurteilen!

### StackOverflow

#### How to check trained neural net accuracy after testing it?

I trained a neural network with an Input matrix of 792x4415 and a target of 80x4415.

net = patternnet(10);
[net,tr] = train(net,Input,target);

After training, I tested it with a test matrix of size 792x204

result = sim(net,Test);

I get a 80x204 result matrix. Now one way is to see the matrix manually and to see for each image for which class out of 80, it has the highest number. But how can I calculate the accuracy in terms of number of correctly/incorrectly classified classes for this test matrix?

I tried using

[c,cm] = confusion(target,result);
fprintf('Percentage Correct Classification for Unknown Data   : %f%%\n', 100*(1-c));
fprintf('Percentage Incorrect Classification for Unknown  : %f%%\n', 100*c);

But I get the following error

Targets and outputs have different dimensions.

I know the test matrix has 204 examples instead of 4415, that's why I get the error but I dont have that many test examples to test the model with. Is there a way with which I can use confusion function or any other function to calculate the accuracy using this test matrix?

UPDATE

I'm trying to do character recognition. I have 80 characters in total and the total number of image examples for all the characters are 4415, hence my Input and target matrices are of size 792x4415 and 80x4415 respectively. After the training is done, I'm using 204 test images and my test matrix is of size 792x204.

### CompsciOverflow

#### What's Mac Os host?

As I am trying to install VirtualBox on Mac OS X, I had gone into the VirtualBox website with given a list of Window hosts, Mac Os hosts, Linux hosts, and Solaris hosts? Can anyone guide me through this because I don't understand it at all? My goal is to install VirtualBox then install Ubuntu in VirtualBox.

### StackOverflow

#### What type of ML is this? Algorithm to repeatedly choose 1 correct candidate from a pool (or none)

I have a set of 3-5 black box scoring functions that assign positive real value scores to candidates.

Each is decent at ranking the best candidate highest, but they don't always agree. I'd like to find how to combine the scores together for an optimal meta-score such that, among a pool of candidates, the one with the highest meta-score is usually the actual correct candidate.

So they are plain R^n vectors, but each dimension individually tends to have a higher value for correct candidates. Naively I could just multiply the components, but I hope there's something more subtle to benefit from.

If the highest score is too low (or perhaps the two highest are too close), I just give up and say 'none'.

So for each trial, my input is a set of these score-vectors, and the output is which vector corresponds to the actual right answer, or 'none'. This is kind of like tech interviewing where a pool of candidates are interviewed by a few people who might have differing opinions but in general, each tend to prefer the best candidate. My own application has an objective best candidate.

I'd like to maximize correct answers and minimize false positives.

More concretely, my training data might look like many instances of

{[0.2, 0.45, 1.37], [5.9, 0.02, 2], ...}  ->  i

Where i is the ith candidate vector in the input set.

So I'd like to learn a function that tends to maximize the actual best candidate's score vector from the input. There are no degrees of bestness. It's binary right or wrong. However, it doesn't seem like traditional binary classification because, among an input set of vectors, there can be at most 1 "classified" as right, the rest are wrong.

### Lobsters

#### What are you working on this week?

This is the weekly thread to discuss what you have done recently and are working on this week.

### Fred Wilson

#### AVC Demographics

If I was going to start selling ads on AVC (I am not), here are the advertising categories I should target according to Google Analytics:

Nothing too surprising in here but it is interesting that entertainment (Film and TV) scored so high. I guess you don’t spend all of your time reading AVC

### Fefe

#### Setzt hier jemand Lenovo-Geräte ein?

Setzt hier jemand Lenovo-Geräte ein?

#### Kurze Durchsage des Papstes (auf die Frage, ob die ...

Kurze Durchsage des Papstes (auf die Frage, ob die katholische Kirche sich bei Homosexuellen entschuldigen müsse):
"I think the church must not only apologize … to a gay person it offended, but we must apologize to the poor, to women who have been exploited, to children forced into labor, apologize for having blessed so many weapons" and for having failed to accompany families who faced divorces or experienced other problems.

Burying-Liste:

### StackOverflow

#### Sklearn PCA/TSNE giving out same X and Y values for each sample

I am using PCA and TSNE for dimensionality reduction. My data has 400 dimesions and I want to visualize it and hence bringing it to 2-dimesions. However, the output for PCA and TSNE has the same X-Y value for all samples. Can someone tell what's probably going wrong? Code and PCA & TSNE results attached (as you will notice both X and Y have the same value for all samples.).

import numpy as np
from sklearn.decomposition import PCA
from sys import argv
import ast

vector_filename = argv[1]

vector_file = file(vector_filename)

vectors = []

for line in vector_file:
vectors.append(ast.literal_eval(line.strip()))

reduced_dim_file =  open("2snippets_reduced_dim_file.tsv","w+")

pca = PCA(n_components=2) # tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
low_dim_embs = pca.fit_transform(vectors) # low_dim_embs = tsne.fit_transform(train)
temp = ""
for i in xrange(len(low_dim_embs)):
temp = str(low_dim_embs[i][0]) + "," + str(low_dim_embs[i][0]) + "\n"
reduced_dim_file.write(temp)
reduced_dim_file.close()

PCA Results:

-0.00675654299013,-0.00675654299013
0.224296213823,0.224296213823
1.19994839523,1.19994839523
-1.41748806607,-1.41748806607

TSNE Results:

29.5389019637,29.5389019637
203.414562899,203.414562899
-85.2939343045,-85.2939343045
-151.961534603,-151.961534603

#### What is tail recursion?

Whilst starting to learn lisp, I've come across the term tail-recursive. What does it mean?

### CompsciOverflow

#### Turing Completeness of System Which Randomly Fails to Complete Calculations

If one were to create a variant of a turing complete language which upon completing a calculation randomly changes the answer by one, would it be Turing complete? For example, say I had a Python implementation which, upon every variable assignment, with probability $\frac13$ decreased the value by $1$, with probability $\frac13$ increased the value by $1$ and otherwise kept the value constant, would it be Turing complete? This seems to be able to perform all of the calculations which Python is able to perform, just not reliably.

#### Elder Scrolls Skyrim :V Crashes On Windows 10 [on hold]

I was playing Skyrim version 1.1 on windows 10 and everything was fine. Then I updated The game to the latest version (1.9) now when I double click on The Luncher or The TESV.exe just nothing happens(game doesnt pop up) I have tried updating my GIU drivers and modifying my game files but no luck please help

#### Decidability Turing Machines

Let $\Sigma$ be an alphabet, and suppose that $A$, $B \subseteq \Sigma^*$ are Turing recognizable languages where both $A \cup B$ and $A \cap B$ are decidable. Prove that $A$ is decidable.

Is this true? I have tried to prove it by considering the intersection and complement of the sets, but I think I am missing something.

### StackOverflow

#### Does word2vec make sense for supervised learning?

I have a list of sentence/label pairs to train the model, how should I encode the sentences as input to, say an SVM?

### CompsciOverflow

#### Use of sorting in counterexamples for equations

I came across a question which asked how sorting would help in searching for counterexamples to the conjecture that $$u^6 + v^6 + w^6 + x^6 + y^6 = z^6$$ has no non trivial solutions in integers.

The answer said to make two files containing values of $u^6 + v^6 + w^6 \pmod W$ and $z^6 - y^6 - x^6 \pmod W$. $W$ is the word size of the computer. Sort these, search for duplicates and go for further steps.

Can someone explain what these further steps would be in detail ?

### QuantOverflow

#### GARCH variance vs standard deviation for volatility

in my series of questions related to GARCH and volatility I finally think I've got a decent grasp on it. You guys have been great help clearing up my questions for me.

My next question is just a confirmation of my suspicion. It's well known that in finance, volatility is typically understood to be the standard deviation of returns. However, GARCH analysis helps you forecast the conditional variance of a process.

Suppose I have an ARIMA-GARCH forecast for the log returns of a series. GARCH gives us the equations:

$$y_t = x'_t + \epsilon_t$$ $$\epsilon_t|\psi_t ~ N(0, \sigma_t^2$$ $$\sigma_t^2 = \omega + \alpha_1\epsilon_{t-1}^2 + ... + \alpha_q\epsilon_{t-q}^2 + \beta_1\sigma_{t-1}^2 + ... + \beta_p\sigma_{t-p}^2$$

These equations define the variance at time $t$, $\sigma^2_t$.

If my forecast returns a value of $0.05$ for the 1 step ahead forecast then I can simply take the square root of the forecast to get the conditional volatility - correct? So in this case the 1 step ahead forecast of volatility is:

$$\sqrt{0.05} = 0.1732$$

This seems correct to me, but I am having trouble finding people who are doing this and I want to make sure that this is sound.

Thank you!

### CompsciOverflow

#### Is the complement of { ww | ... } context-free?

Define the language $L$ as $L = \{a, b\}^* - \{ww\mid w \in \{a, b\}^*\}$. In other words, $L$ contains the words that cannot be expressed as some word repeated twice. Is $L$ context-free or not?

I've tried to intersect $L$ with $a^*b^*a^*b^*$, but I still can't prove anything. I also looked at Parikh's theorem, but it doesn't help.

### StackOverflow

#### What's the difference between a lens and a partial lens?

A "lens" and a "partial lens" seem rather similar in name and in concept. How do they differ? In what circumstances do I need to use one or the other?

Tagging Scala and Haskell, but I'd welcome explanations related to any functional language that has a lens library.

### CompsciOverflow

#### Solving $tr(X^{-1}Y)$ quickly

I know that one can solve the equation $X^{-1}\mathbf{v}$ quickly with conjugate gradient method. Is there a similar approach for solving $tr(X^{-1}Y)$ (similarly interesting to me are $tr(X^{-1}), tr(X^{-1}YX^{-1}Y))$?

### StackOverflow

#### opencv cascade classifier detects background

I have been using cascade classifier to train some kind of plants. Here is a sample image for what I want to detect

I sampled the little green plants for positives, and made negatives out of images with similar background and no green plants (as suggested by many sources). Used many images similar to this one for sampling.

I did not have a lot of training data so of course I did not expect some idealistic classification results.

I have set the usual parameters min_hit_rate 0.95 max_false_alarm 0.5 etc. I have tried training with 5,6,7,8,9 and 10 stages. The strange thing that happens to me is that during the training process I get hit rate of 1 during all stages, and after 5 stages I get good acceptance ratio 0.004 (similar for later stages 6,7,8...). I tried testing my classifier on the same image which I used for the training samples and there is very illogical behavior:

1. the classifier detects almost everything BUT the positive samples i took from it (the same samples in the training with HIT RATION EQUAL TO 1).
2. the classifier is really but really slow it took over an hour for single input image (down-sampled scale factor 1.1).

I do not get it how could the same samples be classified as positives during training (through all the stages) and then NONE of it as positive on the image (there are a lot of false positives around it).

I checked everything a million times (I thought that I somehow mixed positives and negatives but I did not).

Can someone help me with this issue?

### QuantOverflow

#### How to measure the real interest rate using the consumer price index

I am examining how investor sentiment affects the probability of stock market crises. I am using methodology similar to this paper https://ideas.repec.org/p/dij/wpfarg/1110304.html.

One of the control variables in the model is the real interest rate. The authors calculate their measure of the real interest rate as "the money market rate, using the Consumer Price Index" (Table on page 25 of their paper).

I have CPI data for the countries that I am using in my study. However, I am not clear on how to transform Money Market Rate and CPI data into a measure of the real interest rate.

Thanks.

### UnixOverflow

#### separate debug-info files in FreeBSD

In GNU/Linux, there are debug-link and build-id sections in ELF files which shows separate debug-info files name or CRC32 [https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html]. There are also debug packets (e.g. nginx-debug) which can be installed by yum or apt.

I am new to FreeBSD and did not find such debug sections in ELF files or debug-packets. Is there separate debug-info files in FreeBSD?

### CompsciOverflow

#### Is Terra the only system programming language able to compile code at runtime? [migrated]

I stumbled across Terra last year, and the most similar language I can think of is Julia. Terra is inspired by and uses Lua for templating/metaprogramming instead of being derived from e.g. the Lisp or ML families, but like many JITted language implementations it blurs the line between compile-time and run-time.

Are there any other low-level (no VM, small runtime, etc.) programming languages I'm missing that can compile and run code at runtime (besides assembly)? Maybe Forth-likes, or Extempore's xtlang? Terra calls this (multi-)staged programming.

### StackOverflow

#### Does the example for decaying the learning rate in TensorFlow website actually decay the learning rate?

I was reading the decaying learning rate and thought there might be a mistake in the docs and wanted to confirm. It says that the decay equation is:

decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)

however, if global_step = 0 I'd guess there is never a decay, right? However, look at the example:

...
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
100000, 0.96, staircase=True)
# Passing global_step to minimize() will increment it at each step.
learning_step = (
.minimize(...my loss..., global_step=global_step)
)

It has a global_step = tf.Variable(0, trainable=False) that is set equal to zero. Thus, no decay. Is this correct deduction?

I thought there might be a caveat due to integer division when staircase function is set to true, but even in integer division that still seems that there is no decay. Or is there a misunderstanding of what staircase does?

### StackOverflow

#### How to convert hdf5 format to caffemodel format in caffe?

Basically I am using using caffe for deep learning. Now I finetuned a network for my own dataset and saved my model in hdf5 format which is .h5 extension.

Now I want to classify images using matcaffe. matcaffe understands only .caffemodel format for trained model.
So is there any way to convert hdf5 file to caffemodel?

#### Clustering algorithm in R for missing categorical and numerical values

I want to perform marketing segmentation clustering on a dataset with missing categorical and numerical values in R. I cannot perform k-means clustering because of the missing values.

R version 3.1.0 (2014-04-10)

Platform: x86_64-apple-darwin13.1.0 (64-bit)

Mac OSX 10.9.3 4GB hardrive

Is there a clustering algorithm package in R available that can accommodate a partial fill rate? Looking at scholarly articles on missing values, the researchers create a new algorithm for the special use case and the packages are not available in R. For example, k-means with soft constraints and k-means clustering with partial distance strategy.

I have 36 variables, but here is description of the first 5:

user_id    Age   Gender Household.Income Marital.Status
1   12945           Male
2   12947           Male
3   12990
4   13160   25-34   Male   100k-125k         Single
5   13195           Male    75k-100k         Single
6   13286

### CompsciOverflow

#### System f-sub, how to do type checking?

I was reading that system f-sub (polymorphic lambda calculus with sub-typing) and I was quite confused with its one checking rule called "T-TAPP".

This rule as following (ctx denotes the typing context)

ctx |- t1 : ∀X<:T11. T12    ,    ctx |- T2<:T11
-----------------------------------------------
ctx |- t1 [T2] : [X-->T2]T12

I could not understand how '[x-->T2]T12' will be used (I know it is substitution). This rule appears on page 10 in the following source. I am looking for two type checking examples, in which above inference rule will be applied and at least one example is a case of type checking failure.

Could anyone provide me with concrete examples?
Description of system F-sub

### StackOverflow

#### Test multiple algorithms in one experiment

Is there any way to test multiple algorithms rather than doing it once for each and every algorithm; then checking the result? There are a lot of times where I don’t really know which one to use, so I would like to test multiple and get the result (error rate) fairly quick in Azure Machine Learning Studio.

#### Azure ML Loops through the different tasks

I've got the following data structure in an Azure DB Table:

Client_ID | Customer_ID | Item | Preference_Score

The table can contain different datasets from different clients but the data structure is always the same. Then, the table is imported in Azure ML.

What I need is to repeat the same sequence of tasks in Azure ML for all the Client_ID in the above mentioned table.

So that in the end I will train a single model for each client and score the data of each single client individually and append the scored data and store it again in Azure SQL.

Is there any for each task in Azure ML like in SSIS? What's the best way to do this?

Thanks.

#### Pretraining using word2vec for text classification

Looking at the blog post from: http://www.wildml.com/2015/12/implementing-a-cnn-for-text-classification-in-tensorflow/

For text classification using the convolutional neural network, the author mentions something about word2vec as embeddings.

My question is, if we decide to use word2vec to embed the input to the neural network, what benefit does this add?

### Lobsters

#### What do you do in your spare time?

I enjoy riding my bicycling, traveling, and hiking. What do you find yourself doing when you aren’t glued to work or other responsibilities?

### QuantOverflow

#### How to calculate the JdK RS-Ratio

Anyone have a clue how to calculate the JdK RS-Ratio?

Let's say I want to compare the Relative strength for these:

• EWA iShares MSCI Australia Index Fund

• EWC iShares MSCI Canada Index Fund

• EWD iShares MSCI Sweden Index Fund

• EWG iShares MSCI Germany Index Fund

• EWH iShares MSCI Hong Kong Index Fund

• EWI iShares MSCI Italy Index Fund

• EWJ iShares MSCI Japan Index Fund

• EWK iShares MSCI Belgium Index Fund

• EWL iShares MSCI Switzerland Index Fund

• EWM iShares MSCI Malaysia Index Fund

• EWN iShares MSCI Netherlands Index Fund

• EWO iShares MSCI Austria Index Fund

• EWP iShares MSCI Spain Index Fund

• EWQ iShares MSCI France Index Fund

• EWS iShares MSCI Singapore Index Fund

• EWU iShares MSCI United Kingdom Index Fund

• EWW iShares MSCI Mexico Index Fund

• EWT iShares MSCI Taiwan Index Fund

• EWY iShares MSCI South Korea Index Fund

• EWZ iShares MSCI Brazil Index Fund

• EZA iShares MSCI South Africa Index Fund

Each of them should be compared to the SP500 (SPY index). Calculate the relative strength of each of them to SPY and have it normalized (I think it is the only solution)

### StackOverflow

#### When should one set the staircase is True when decaying the learning rate in TensorFlow?

Recall that when exponentially decaying the learning rate in TensorFlow one does:

decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)

the docs mention this staircase option as:

If the argument staircase is True, then global_step /decay_steps is an integer division and the decayed learning rate follows a staircase function.

when is it better to decay every X number of steps and follow at stair case function rather than a smoother version that decays more and more with every step?

### QuantOverflow

#### Monetary Policy and the Yield Curve PART TWO

The Fed has a number of tools/targets with which they manage monetary policy. I'm looking to refine a concise summary of them and looking for guidance/correction/validation.

Think I understand these first three. Please correct me if I'm wrong:

1. Open Market Operations: The Federal Open Market Committee (FOMC) will often instruct the Federal Reserve Bank of New York to engage in open market operations (buying and selling of US securities) to influence interest rates. Movement at all maturities on the yield curve can reflect such operations; the Fed has been known to try and alter the shape/slope of the curve.
2. The Discount Window: offers various types of credit at the discount rate; designed for times of stress; rates are high (penalty rates - see Bagehot's Dictum); use of discount window credit may spark regulator investigation. Discount Window credit is typically overnight (primary/secondary) or less than 9 months in the case of seasonal loans. Changes in discount rate only affects the short end of the yield curve.

3. The Fed Fund rate: overnight rate at which reserve balances, held by banks at the fed can be lent to each other. This rate is calculated from market transactions. The Fed determines their FF rate target and use open market operations to move the Fed Funds rate toward a particular level. Whilst the Fed Fund rate is an overnight rate, it can be related to longer term movements on the yield curve (1-month treasury bill for example) but there are differences; notably the Fed Funds rate, being a market rate, does vary, while the yield on a 1-month Treasury is effectively fixed at the time of purchase. The relationship between the expected values of a fixed rate and a floating rate is expressed through Overnight Indexed Swap values, and 1-month OIS on the Fed Funds rate is the best direct indication of the expected value of compounded overnight borrowing in the Fed Funds market.

I'm looking for further confirmation/understanding no the next two:

1. The reverse repo program, which enables it to set a floor under short-term secured borrowing rates. This makes sense: reverse repo = sell security, collect payment from bank, reduce their fed reserve balance, decrease supply of money in the system and put upwards pressure on the federal funds rate for example. Is this logic correct?

2. The interest rate on excess reserves (IOER); from comments on my prior question, I understand that this rate sets the ceiling for fed funds. IOER = interest paid on balances above the required level; how does that set a ceiling? Sounds more like a floor; for a bank to lend its excess reserves, they would want a higher rate than the IOER?

This is a follow on from part one which was posted here.

### StackOverflow

#### Does R have something equivalent to reduce() in Python?

That is : "Apply function of two arguments cumulatively to the items of sequence, from left to right, so as to reduce the sequence to a single value. "

### Planet Theory

#### There is now a Bounded Discrete Envy Free Cake Cutting Protocol!

Lance: Bill, there is a new result on cake cutting that was presented at STOC! Do you want to blog about it?

Bill: Do snakes have hips! Does a chicken have lips!

Lance:  No to the first one and I don't know to the second one.

Bill: Yes I'll blog about it! Whats the paper?

Lance: Its this paper by Aziz and Mackenzie.

Bill: Oh. Thats not new. Five people emailed be about it a while back. But yes I will blog about it.

Cake Cutting: There are n people with different tastes in cake (some like chocolate  and some... OH, who doesn't like chocolate? Okay, someone prefers kale which is on the cake.) They want a protocol that divides the cake in a way that is fair. What is fair? There are many definitions but I'll talk about two of them.

Proportional: Everyone gets 1/n of the cake (in their own opinion- I will omit saying this from now on).

Proportional sounds fair but consider the following scenario: Alice things she got 1/3 but she things Bob got 1/2 and Eve got 1/6.  Alice i will envy Bob.

Envy Free: Everyone things they have the larges piece (or are tied for it).

What is a protocol? It is a set of instructions and advice so that if (1) if the players all follow the advice then the end result is fair, and (2) if a player does not follow the advice then that player might get less than his fair share. Hence all players are motivated to follow the advice. We assume that everyone acts in their own self interest and that they are at a diamond cutters convention (perhaps co-located with STOC) so they really can cut cake very finely.

We will only consider discrete protocols. We wont' define this formally.

Prior Results:
1) There is a protocol for Prop fairness for n people that uses  O(n log n) cuts. See my notes

2) Jeff Edmonds and Kirk Pruhs showed a lower bound of Omega n log n). See their paper.

3) There is a protocol for Envy Free fairness for 3 people due to Conway and Selfridge. This was in 1960. This protocol took 5 cuts. (It is in the doc I point to in next item)

4) In 1995 Brams and Taylor obtained a protocol for envy free fairness  for n people. But there is a catch- there is no bound on the number of cuts. For all N there is a way to set four peoples tastes so that the protocol takes more than N cuts.  See my notes.

All items to follow are for an envy free protocol for n people.

5) It was an open question to determine if there is a bounded protocol. Stromquest proved that there can be no bounded protocol if all of the players got a contiguous piece, though this was not the case in the Brams-Taylor protocol. See his paper

At the time I thought there would be no bounded protocol. I found a way to measure unbounded protocols using ordinals and wrote a paper on it: See my paper.

6) Aziz and  MacKenzie showed there was a bounded protocol for 4 people. See their paper.

7) Aziz and MacKenzie, STOC 2016, showed there was, a  protocol that takes at most nO(n) cuts. Hence a bounded protocol! See their paper.

Whats next? either improve the number of cuts or show it can't be done!

### CompsciOverflow

#### Why is b-tree search O(log n)?

B-tree is a data structure, which looks like this:

If I want to look for some specific value in this structure, I need to go through several elements in root to find the right child-node. The I need to go through several elements in the child-node to find its right child-node etc.

The point is, when I have $n$ elements in every node, then I have to go through all of them in the worst case. So, we have $O(n)$ complexity for searching in one node.

Then, we must go through all the levels of the structure, and they're $log_m N$ of them, $m$ being the order of B-tree and $N$ the number of all elements in the tree. So here, we have $O(log N)$ complexity in the worst case.

Putting these information together, we should have $O(n) * O(log n) = O(n * log n)$ complexity.

But the complexity is just $O(log n)$ - why? What am I doing wrong?

### StackOverflow

#### Text Summarization Evaluation - BLEU vs ROUGE

With the results of two different summary systems (sys1 and sys2) and the same reference summaries, I evaluated them with both BLEU and ROUGE. The problem is: All ROUGE scores of sys1 was higher than sys2 (ROUGE-1, ROUGE-2, ROUGE-3, ROUGE-4, ROUGE-L, ROUGE-SU4, ...) but the BLEU score of sys1 was less than the BLEU score of sys2 (quite much).

So my question is: Both ROUGE and BLEU are based on n-gram to measure the similar between the summaries of systems and the summaries of human. So why there are differences in results of evaluation like that? And what's the main different of ROUGE vs BLEU to explain this issue?

Any advice and suggestions will be greatly appreciated! Thanks!

### QuantOverflow

#### Special term for 'intersection' of option price

Suppose, I have written two ordered lists:

$S_{call}= (\textbf{8000, 8050, 8100}, 8150, 8200, 8250)$ and
$S_{put} = (7850, 7900, 7950, \textbf{8000, 8050, 8100})$.

Entities are correspond to strike prices of call and put on the same underlying asset XYZ.

Update:

Spot price is equal to $8067.6$, then XYZ 8050 call and XYZ 8050 put are "at-the-money" options, XYZ 8000 call and XYZ 8100 put are "in-the-money" options, and the remaining options would be "out-of-the-money".

How to name strike prices which are marked with bold? Is there a special term?

### arXiv Discrete Mathematics

#### Boosting quantum annealer performance via quantum persistence. (arXiv:1606.07797v1 [quant-ph])

We propose a novel method for reducing the number of variables in quadratic unconstrained binary optimization problems, using a quantum annealer to fix the value of a large portion of the variables to values that have a high probability of being optimal. This method significantly increases the success rate and number of observations of the best known energy value in the sample obtained from the quantum annealer, when compared with calling the quantum annealer without using it, even when using fewer annealing cycles. Use of the method results in a considerable improvement in success metrics even for problems with high-precision couplers and biases. The results are further enhanced by applying the method iteratively and combining it with classical pre-processing. We present results for both Chimera graph-structured problems and embedded problems from a real-world application.

#### Incentive Schemes for Mobile Peer-to-Peer Systems and Free Riding Problem: A Survey. (arXiv:1606.07785v1 [cs.DC])

Mobile peer-to-peer networks are quite prevalent and popular now days due to advent of business scenarios where all the services are going mobile like whether it's to find good restaurants, healthy diet books making friends, job-hunting, real state info or cab-sharing etc. As the mobile users are increasing day by day, peer-to-peer networks getting bigger and complex. In contrast to client server system in peer-to-peer network resource sharing is done on the basis of mutual consent and agreed policies with no central authority and controlling entity. Incentive schemes for P2P networks are devised to encourage the participation and to adhere the policies agreed. P2P services based only on altruistic behaviour of users are facing serious challenges like Free riding or The tragedy of commons. Free riders are the users who consume the bandwidth of the system (perform downloading) but don't show altruistic behaviour (deny uploading) and act as a parasite for the P2P network. To counter the free riding issue many Incentive schemes are suggested by the researchers. In this paper we will survey the different incentive schemes, their architectures keeping eye on how they handle the challenges of modern P2P network.

#### Satellite Images Analysis with Symbolic Time Series: A Case Study of the Algerian Zone. (arXiv:1606.07784v1 [cs.DB])

Satellite Image Time Series (SITS) are an important source of information for studying land occupation and its evolution. Indeed, the very large volumes of digital data stored, usually are not ready to a direct analysis. In order to both reduce the dimensionality and information extraction, time series data mining generally gives rise to change of time series representation. In an objective of information intelligibility extracted from the representation change, we may use symbolic representations of time series. Many high level representations of time series have been proposed for data mining, including Fourier transforms, wavelets, piecewise polynomial models, etc. Many researchers have also considered symbolic representations of time series, noting that such representations would potentiality allow researchers to avail of the wealth of data structures and algorithms from the text processing and bioinformatics communities. We present in this work, one of the main symbolic representation methods "SAX"(Symbolic Aggregate Approximation) and we experience this method to symbolize and reduce the dimensionality of a Satellite Image Times Series acquired over a period of 5 years by characterizing the evolution of a vegetation index (NDVI).

#### Polynomial-Time Key Recovery Attack on the Faure-Loidreau Scheme based on Gabidulin Codes. (arXiv:1606.07760v1 [cs.CR])

Encryption schemes based on the rank metric lead to small public key sizes of order of few thousands bytes which represents a very attractive feature compared to Hamming metric-based encryption schemes where public key sizes are of order of hundreds of thousands bytes even with additional structures like the cyclicity. The main tool for building public key encryption schemes in rank metric is the McEliece encryption setting used with the family of Gabidulin codes. Since the original scheme proposed in 1991 by Gabidulin, Paramonov and Tretjakov, many systems have been proposed based on different masking techniques for Gabidulin codes. Nevertheless, over the years all these systems were attacked essentially by the use of an attack proposed by Overbeck.

In 2005 Faure and Loidreau designed a rank-metric encryption scheme which was not in the McEliece setting. The scheme is very efficient, with small public keys of size a few kiloBytes and with security closely related to the linearized polynomial reconstruction problem which corresponds to the decoding problem of Gabidulin codes. The structure of the scheme differs considerably from the classical McEliece setting and until our work, the scheme had never been attacked. We show in this article that this scheme like other schemes based on Gabidulin codes, is also vulnerable to a polynomial-time attack that recovers the private key by applying Overbeck's attack on an appropriate public code. As an example we break concrete proposed $80$ bits security parameters in a few seconds.

#### A Game-Theoretic Approach to Word Sense Disambiguation. (arXiv:1606.07711v1 [cs.AI])

This paper presents a new model for word sense disambiguation formulated in terms of evolutionary game theory, where each word to be disambiguated is represented as a node on a graph whose edges represent word relations and senses are represented as classes. The words simultaneously update their class membership preferences according to the senses that neighboring words are likely to choose. We use distributional information to weigh the influence that each word has on the decisions of the others and semantic similarity information to measure the strength of compatibility among the choices. With this information we can formulate the word sense disambiguation problem as a constraint satisfaction problem and solve it using tools derived from game theory, maintaining the textual coherence. The model is based on two ideas: similar words should be assigned to similar classes and the meaning of a word does not depend on all the words in a text but just on some of them. The paper provides an in-depth motivation of the idea of modeling the word sense disambiguation problem in terms of game theory, which is illustrated by an example. The conclusion presents an extensive analysis on the combination of similarity measures to use in the framework and a comparison with state-of-the-art systems. The results show that our model outperforms state-of-the-art algorithms and can be applied to different tasks and in different scenarios.

#### The Niceness of Unique Sink Orientations. (arXiv:1606.07709v1 [cs.DM])

Random Edge is the most natural randomized pivot rule for the simplex algorithm. Considerable progress has been made recently towards fully understanding its behavior. Back in 2001, Welzl introduced the concepts of \emph{reachmaps} and \emph{niceness} of Unique Sink Orientations (USO), in an effort to better understand the behavior of Random Edge. In this paper, we initiate the systematic study of these concepts. We settle the questions that were asked by Welzl about the niceness of (acyclic) USO. Niceness implies natural upper bounds for Random Edge and we provide evidence that these are tight or almost tight in many interesting cases. Moreover, we show that Random Edge is polynomial on at least $n^{\Omega(2^n)}$ many (possibly cyclic) USO. As a bonus, we describe a derandomization of Random Edge which achieves the same asymptotic upper bounds with respect to niceness.

#### Enforcing Termination of Interprocedural Analysis. (arXiv:1606.07687v1 [cs.PL])

Interprocedural analysis by means of partial tabulation of summary functions may not terminate when the same procedure is analyzed for infinitely many abstract calling contexts or when the abstract domain has infinite strictly ascending chains. As a remedy, we present a novel local solver for general abstract equation systems, be they monotonic or not, and prove that this solver fails to terminate only when infinitely many variables are encountered. We clarify in which sense the computed results are sound. Moreover, we show that interprocedural analysis performed by this novel local solver, is guaranteed to terminate for all non-recursive programs --- irrespective of whether the complete lattice is infinite or has infinite strictly ascending or descending chains.

#### Message-Combining Algorithms for Isomorphic, Sparse Collective Communication. (arXiv:1606.07676v1 [cs.DC])

Isomorphic (sparse) collective communication is a form of collective communication in which all involved processes communicate in small, identically structured neighborhoods of other processes. Isomorphic neighborhoods are defined via an embedding of the processes in a regularly structured topology, e.g., $d$-dimensional torus, which may correspond to the physical communication network of the underlying system. Isomorphic collective communication is useful for implementing stencil and other regular, sparse distributed computations, where the assumption that all processes behave (almost) symmetrically is justified.

In this paper, we show how efficient message-combining communication schedules for isomorphic, sparse collective communication can easily and efficiently be computed by purely local computations. We give schemes for \emph{isomorphic \alltoall} and \emph{\allgather} communication that reduce the number of communication rounds and thereby the communication latency from $s$ to at most $Nd$, for neighborhoods consisting of $s$ processes with the (small) factor $N$ depending on the structure of the neighborhood and the capabilities of the communication system. Using these schedules, we give \emph{zero-copy implementations} of the isomorphic collectives using MPI and its derived datatypes to eliminate explicit, process-local copy operations. By benchmarking the collective communication algorithms against straightforward implementations and against the corresponding MPI neighborhood collectives, we document significant latency improvements of our implementations for block sizes of up to a few kilobytes. We discuss further optimizations for computing even better schedules, some of which have been implemented and benchmarked.

#### Caching Strategies for Information Centric Networking: Opportunities and Challenges. (arXiv:1606.07630v1 [cs.NI])

Internet usage has shifted from host-centric end-to-end communication to a content-centric approach mainly used for content delivery. Information Centric Networking (ICN) was proposed as a promising novel content delivery architecture. ICN includes in-network caching features at every node which has a major impact on content retrieval. For instance, the ICN efficiency depends drastically on the management of the caching resources. The management of caching resources is achieved with caching strategies. Caching strategies decide what, where and when content is stored in the network. In this paper, we revisit the recent technical contributions on caching strategies, we compare them and discuss opportunities and challenges for improving content delivery in the upcoming years.

#### Benchmarking Distributed Stream Processing Platforms for IoT Applications. (arXiv:1606.07621v1 [cs.DC])

Internet of Things (IoT) is a technology paradigm where millions of sensors monitor, and help inform or manage, physical, envi- ronmental and human systems in real-time. The inherent closed-loop re- sponsiveness and decision making of IoT applications makes them ideal candidates for using low latency and scalable stream processing plat- forms. Distributed Stream Processing Systems (DSPS) are becoming es- sential components of any IoT stack, but the efficacy and performance of contemporary DSPS have not been rigorously studied for IoT data streams and applications. Here, we develop a benchmark suite and per- formance metrics to evaluate DSPS for streaming IoT applications. The benchmark includes 13 common IoT tasks classified across various func- tional categories and forming micro-benchmarks, and two IoT applica- tions for statistical summarization and predictive analytics that leverage various dataflow compositional features of DSPS. These are coupled with stream workloads sourced from real IoT observations from smart cities. We validate the IoT benchmark for the popular Apache Storm DSPS, and present empirical results.

Time-to-Live data in the IP header offers two interesting characteristics: First, different IP stacks pick different start TTL values. Second, each traversed router should decrement the TTL value. The combination of both offers host and route fingerprinting options. We present the first work to investigate Internet-wide TTL behavior at carrier scale and evaluate its fit to detect anomalies, predominantly spoofed source IP addresses. Using purpose-built software, we capture 2 weeks of raw TTL data at a 40 Gbit/s Internet uplink. For further insight, we actively measure observed hosts and conduct large-scale hitlist-based measurements, which yields three complementary data sets for IPv4 and IPv6. A majority (69% IPv4; 81% IPv6) of passively observed multi-packet hosts exhibit one stable TTL value. Active measurements on unstable hosts yield a stable anchor TTL value for more than 85% of responsive hosts. We develop a structure to further classify unstable hosts taking, for example, temporal stability into account. Correlation of TTL values with BGP data is clear, yet unpredictive. The results indicate that carrier-grade TTL anomaly detection can yield significant insights in the following categories: First, the method can flag anomalies based on TTL observations (yet likely at a difficult false positive/false negative trade-off). Second, the method can establish trust that a packet originates from its acclaimed source.

#### N-queens-based algorithm for moving object detection in distributed wireless sensor networks. (arXiv:1606.07583v1 [cs.MM])

The main constraint of wireless sensor networks (WSN) in enabling wireless image communication is the high energy requirement, which may exceed even the future capabilities of battery technologies. In this paper we have shown that this bottleneck can be overcome by developing local in-network image processing algorithm that offers optimal energy consumption. Our algorithm is very suitable for intruder detection applications. Each node is responsible for processing the image captured by the video sensor, which consists of NxN blocks. If an intruder is detected in the monitoring region, the node will transmit the image for further processing. Otherwise, the node takes no action. Results provided from our experiments show that our algorithm is better than the traditional moving object detection techniques by a factor of (N/2) in terms of energy savings.

#### Detecting New and Arbitrary Relations among Linked Data Entities using Pattern Extraction. (arXiv:1606.07572v1 [cs.DB])

Although several RDF knowledge bases are available through the LOD initiative, often these data sources remain isolated, lacking schemata and links to other datasets. While there are numerous works that focus on establishing that two resources are identical and on adding more instances of an already existing relation, the problem of finding new relations between any two given datasets has not been investigated in detail. In this paper, given two entity sets, we present an unsupervised approach to enrich the LOD cloud with new relations between them by exploiting the web corpus. During the first phase we gather prospective relations from the corpus through pattern extraction and paraphrase detection. In the second phase, we perform actual enrichment by extracting instances of these relations. We have empirically evaluated our approach on several dataset pairs and found that the system can indeed be used for enriching the existing datasets with new relations.

#### Dynamic Witnesses for Static Type Errors. (arXiv:1606.07557v1 [cs.PL])

Static type errors are a common stumbling block for newcomers to typed functional languages. We present a dynamic approach to explaining type errors by generating counterexample witness inputs that illustrate how an ill-typed program goes wrong. First, given an ill-typed function, we symbolically execute the body to dynamically synthesize witness values that can make the program go wrong. We prove that our procedure synthesizes general witnesses in that if a witness is found, then for all inhabited input types, there exist values that can make the function go wrong. Second, we show how to extend the above procedure to produce a reduction graph that can be used to interactively visualize and debug witness executions. Third, we evaluate the coverage of our approach on two data sets comprising over 4,500 ill-typed student programs. Our technique is able to generate witnesses for 88% of the programs, and our reduction graph yields small counterexamples for 81% of the witnesses. Finally, we evaluate the utility of our witnesses in helping students understand and fix type errors, and find that students presented with our witnesses consistently show a greater understanding of type errors than those presented with a standard error message.

#### SNAP: A General Purpose Network Analysis and Graph Mining Library. (arXiv:1606.07550v1 [cs.SI])

Large networks are becoming a widely used abstraction for studying complex systems in a broad set of disciplines, ranging from social network analysis to molecular biology and neuroscience. Despite an increasing need to analyze and manipulate large networks, only a limited number of tools are available for this task.

Here, we describe Stanford Network Analysis Platform (SNAP), a general-purpose, high-performance system that provides easy to use, high-level operations for analysis and manipulation of large networks. We present SNAP functionality, describe its implementational details, and give performance benchmarks. SNAP has been developed for single big-memory machines and it balances the trade-off between maximum performance, compact in-memory graph representation, and the ability to handle dynamic graphs where nodes and edges are being added or removed over time. SNAP can process massive networks with hundreds of millions of nodes and billions of edges. SNAP offers over 140 different graph algorithms that can efficiently manipulate large graphs, calculate structural properties, generate regular and random graphs, and handle attributes and meta-data on nodes and edges. Besides being able to handle large graphs, an additional strength of SNAP is that networks and their attributes are fully dynamic, they can be modified during the computation at low cost. SNAP is provided as an open source library in C++ as well as a module in Python.

We also describe the Stanford Large Network Dataset, a set of social and information real-world networks and datasets, which we make publicly available. The collection is a complementary resource to our SNAP software and is widely used for development and benchmarking of graph analytics algorithms.

#### Translucent Players: Explaining Cooperative Behavior in Social Dilemmas. (arXiv:1606.07533v1 [cs.GT])

In the last few decades, numerous experiments have shown that humans do not always behave so as to maximize their material payoff. Cooperative behavior when non-cooperation is a dominant strategy (with respect to the material payoffs) is particularly puzzling. Here we propose a novel approach to explain cooperation, assuming what Halpern and Pass call translucent players. Typically, players are assumed to be opaque, in the sense that a deviation by one player in a normal-form game does not affect the strategies used by other players. But a player may believe that if he switches from one strategy to another, the fact that he chooses to switch may be visible to the other players. For example, if he chooses to defect in Prisoner's Dilemma, the other player may sense his guilt. We show that by assuming translucent players, we can recover many of the regularities observed in human behavior in well-studied games such as Prisoner's Dilemma, Traveler's Dilemma, Bertrand Competition, and the Public Goods game.

#### Announcement as effort on topological spaces. (arXiv:1606.07527v1 [cs.LO])

We propose a multi-agent logic of knowledge, public and arbitrary announcements, that is interpreted on topological spaces in the style of subset space semantics. The arbitrary announcement modality functions similarly to the effort modality in subset space logics, however, it comes with intuitive and semantic differences. We provide axiomatizations for three logics based on this setting, and demonstrate their completeness.

#### Parameterized Complexity Results for a Model of Theory of Mind Based on Dynamic Epistemic Logic. (arXiv:1606.07526v1 [cs.LO])

In this paper we introduce a computational-level model of theory of mind (ToM) based on dynamic epistemic logic (DEL), and we analyze its computational complexity. The model is a special case of DEL model checking. We provide a parameterized complexity analysis, considering several aspects of DEL (e.g., number of agents, size of preconditions, etc.) as parameters. We show that model checking for DEL is PSPACE-hard, also when restricted to single-pointed models and S5 relations, thereby solving an open problem in the literature. Our approach is aimed at formalizing current intractability claims in the cognitive science literature regarding computational models of ToM.

#### Relating Knowledge and Coordinated Action: The Knowledge of Preconditions Principle. (arXiv:1606.07525v1 [cs.MA])

The Knowledge of Preconditions principle (KoP) is proposed as a widely applicable connection between knowledge and action in multi-agent systems. Roughly speaking, it asserts that if some condition is a necessary condition for performing a given action A, then knowing that this condition holds is also a necessary condition for performing A. Since the specifications of tasks often involve necessary conditions for actions, the KoP principle shows that such specifications induce knowledge preconditions for the actions. Distributed protocols or multi-agent plans that satisfy the specifications must ensure that this knowledge be attained, and that it is detected by the agents as a condition for action. The knowledge of preconditions principle is formalised in the runs and systems framework, and is proven to hold in a wide class of settings. Well-known connections between knowledge and coordinated action are extended and shown to derive directly from the KoP principle: a "common knowledge of preconditions" principle is established showing that common knowledge is a necessary condition for performing simultaneous actions, and a "nested knowledge of preconditions" principle is proven, showing that coordinating actions to be performed in linear temporal order requires a corresponding form of nested knowledge.

#### Preference at First Sight. (arXiv:1606.07524v1 [cs.LO])

We consider decision-making and game scenarios in which an agent is limited by his/her computational ability to foresee all the available moves towards the future - that is, we study scenarios with short sight. We focus on how short sight affects the logical properties of decision making in multi-agent settings. We start with single-agent sequential decision making (SSDM) processes, modeling them by a new structure of "preference-sight trees". Using this model, we first explore the relation between a new natural solution concept of Sight-Compatible Backward Induction (SCBI) and the histories produced by classical Backward Induction (BI). In particular, we find necessary and sufficient conditions for the two analyses to be equivalent. Next, we study whether larger sight always contributes to better outcomes. Then we develop a simple logical special-purpose language to formally express some key properties of our preference-sight models. Lastly, we show how short-sight SSDM scenarios call for substantial enrichments of existing fixed-point logics that have been developed for the classical BI solution concept. We also discuss changes in earlier modal logics expressing "surface reasoning" about best actions in the presence of short sight. Our analysis may point the way to logical and computational analysis of more realistic game models.

#### An Axiomatic Approach to Routing. (arXiv:1606.07523v1 [cs.GT])

Information delivery in a network of agents is a key issue for large, complex systems that need to do so in a predictable, efficient manner. The delivery of information in such multi-agent systems is typically implemented through routing protocols that determine how information flows through the network. Different routing protocols exist each with its own benefits, but it is generally unclear which properties can be successfully combined within a given algorithm. We approach this problem from the axiomatic point of view, i.e., we try to establish what are the properties we would seek to see in such a system, and examine the different properties which uniquely define common routing algorithms used today.

We examine several desirable properties, such as robustness, which ensures adding nodes and edges does not change the routing in a radical, unpredictable ways; and properties that depend on the operating environment, such as an "economic model", where nodes choose their paths based on the cost they are charged to pass information to the next node. We proceed to fully characterize minimal spanning tree, shortest path, and weakest link routing algorithms, showing a tight set of axioms for each.

#### Ceteris paribus logic in counterfactual reasoning. (arXiv:1606.07522v1 [cs.LO])

The semantics for counterfactuals due to David Lewis has been challenged on the basis of unlikely, or impossible, events. Such events may skew a given similarity order in favour of those possible worlds which exhibit them. By updating the relational structure of a model according to a ceteris paribus clause one forces out, in a natural manner, those possible worlds which do not satisfy the requirements of the clause. We develop a ceteris paribus logic for counterfactual reasoning capable of performing such actions, and offer several alternative (relaxed) interpretations of ceteris paribus. We apply this framework in a way which allows us to reason counterfactually without having our similarity order skewed by unlikely events. This continues the investigation of formal ceteris paribus reasoning, which has previously been applied to preferences, logics of game forms, and questions in decision-making, among other areas.

#### Do players reason by forward induction in dynamic perfect information games?. (arXiv:1606.07521v1 [cs.GT])

We conducted an experiment where participants played a perfect-information game against a computer, which was programmed to deviate often from its backward induction strategy right at the beginning of the game. Participants knew that in each game, the computer was nevertheless optimizing against some belief about the participant's future strategy.

It turned out that in the aggregate, participants were likely to respond in a way which is optimal with respect to their best-rationalization extensive form rationalizability conjecture - namely the conjecture that the computer is after a larger prize than the one it has foregone, even when this necessarily meant that the computer has attributed future irrationality to the participant when the computer made the first move in the game. Thus, it appeared that participants applied forward induction. However, there exist alternative explanations for the choices of most participants; for example, choices could be based on the extent of risk aversion that participants attributed to the computer in the remainder of the game, rather than to the sunk outside option that the computer has already foregone at the beginning of the game. For this reason, the results of the experiment do not yet provide conclusive evidence for Forward Induction reasoning on the part of the participants.

#### Standard State Space Models of Unawareness (Extended Abstract). (arXiv:1606.07520v1 [cs.AI])

The impossibility theorem of Dekel, Lipman and Rustichini has been thought to demonstrate that standard state-space models cannot be used to represent unawareness. We first show that Dekel, Lipman and Rustichini do not establish this claim. We then distinguish three notions of awareness, and argue that although one of them may not be adequately modeled using standard state spaces, there is no reason to think that standard state spaces cannot provide models of the other two notions. In fact, standard space models of these forms of awareness are attractively simple. They allow us to prove completeness and decidability results with ease, to carry over standard techniques from decision theory, and to add propositional quantifiers straightforwardly.

#### Bayesian Games with Intentions. (arXiv:1606.07519v1 [cs.GT])

We show that standard Bayesian games cannot represent the full spectrum of belief-dependent preferences. However, by introducing a fundamental distinction between intended and actual strategies, we remove this limitation. We define Bayesian games with intentions, generalizing both Bayesian games and psychological games, and prove that Nash equilibria in psychological games correspond to a special class of equilibria as defined in our setting.

#### On the Solvability of Inductive Problems: A Study in Epistemic Topology. (arXiv:1606.07518v1 [cs.LO])

We investigate the issues of inductive problem-solving and learning by doxastic agents. We provide topological characterizations of solvability and learnability, and we use them to prove that AGM-style belief revision is "universal", i.e., that every solvable problem is solvable by AGM conditioning.

#### Coordination Games on Directed Graphs. (arXiv:1606.07517v1 [cs.GT])

We study natural strategic games on directed graphs, which capture the idea of coordination in the absence of globally common strategies. We show that these games do not need to have a pure Nash equilibrium and that the problem of determining their existence is NP-complete. The same holds for strong equilibria. We also exhibit some classes of games for which strong equilibria exist and prove that a strong equilibrium can then be found in linear time.

#### Epistemic Protocols for Distributed Gossiping. (arXiv:1606.07516v1 [cs.AI])

Gossip protocols aim at arriving, by means of point-to-point or group communications, at a situation in which all the agents know each other's secrets. We consider distributed gossip protocols which are expressed by means of epistemic logic. We provide an operational semantics of such protocols and set up an appropriate framework to argue about their correctness. Then we analyze specific protocols for complete graphs and for directed rings.

#### Resolving Distributed Knowledge. (arXiv:1606.07515v1 [cs.LO])

Distributed knowledge is the sum of the knowledge in a group; what someone who is able to discern between two possible worlds whenever any member of the group can discern between them, would know. Sometimes distributed knowledge is referred to as the potential knowledge of a group, or the joint knowledge they could obtain if they had unlimited means of communication. In epistemic logic, the formula D_G{\phi} is intended to express the fact that group G has distributed knowledge of {\phi}, that there is enough information in the group to infer {\phi}. But this is not the same as reasoning about what happens if the members of the group share their information. In this paper we introduce an operator R_G, such that R_G{\phi} means that {\phi} is true after G have shared all their information with each other - after G's distributed knowledge has been resolved. The R_G operators are called resolution operators. Semantically, we say that an expression R_G{\phi} is true iff {\phi} is true in what van Benthem [11, p. 249] calls (G's) communication core; the model update obtained by removing links to states for members of G that are not linked by all members of G. We study logics with different combinations of resolution operators and operators for common and distributed knowledge. Of particular interest is the relationship between distributed and common knowledge. The main results are sound and complete axiomatizations.

#### Human-Agent Decision-making: Combining Theory and Practice. (arXiv:1606.07514v1 [cs.AI])

Extensive work has been conducted both in game theory and logic to model strategic interaction. An important question is whether we can use these theories to design agents for interacting with people? On the one hand, they provide a formal design specification for agent strategies. On the other hand, people do not necessarily adhere to playing in accordance with these strategies, and their behavior is affected by a multitude of social and psychological factors. In this paper we will consider the question of whether strategies implied by theories of strategic behavior can be used by automated agents that interact proficiently with people. We will focus on automated agents that we built that need to interact with people in two negotiation settings: bargaining and deliberation. For bargaining we will study game-theory based equilibrium agents and for argumentation we will discuss logic-based argumentation theory. We will also consider security games and persuasion games and will discuss the benefits of using equilibrium based agents.

#### The Problem of Analogical Inference in Inductive Logic. (arXiv:1606.07513v1 [cs.LO])

We consider one problem that was largely left open by Rudolf Carnap in his work on inductive logic, the problem of analogical inference. After discussing some previous attempts to solve this problem, we propose a new solution that is based on the ideas of Bruno de Finetti on probabilistic symmetries. We explain how our new inductive logic can be developed within the Carnapian paradigm of inductive logic-deriving an inductive rule from a set of simple postulates about the observational process-and discuss some of its properties.

#### Cluster-based MDS Algorithm for Nodes Localization in Wireless Sensor Networks with Irregular Topologies. (arXiv:1606.07506v1 [cs.DC])

Nodes localization in Wireless Sensor Networks (WSN) has arisen as a very challenging problem in the research community. Most of the applications for WSN are not useful without a priori known nodes positions. One solution to the problem is by adding GPS receivers to each node. Since this is an expensive approach and inapplicable for indoor environments, we need to find an alternative intelligent mechanism for determining nodes location. In this paper, we propose our cluster-based approach of multidimensional scaling (MDS) technique. Our initial experiments show that our algorithm outperforms MDS-MAP[8], particularly for irregular topologies in terms of accuracy.

#### Wireless Sensor Networks Localization Methods: Multidimensional Scaling vs. Semidefinite Programming Approach. (arXiv:1606.07502v1 [cs.DC])

With the recent development of technology, wireless sensor networks are becoming an important part of many applications such as health and medical applications, military applications, agriculture monitoring, home and office applications, environmental monitoring, etc. Knowing the location of a sensor is important, but GPS receivers and ophisticated sensors are too expensive and require processing power. Therefore, the localization wireless sensor network problem is a growing field of interest. The aim of this paper is to give a comparison of wireless sensor network localization methods, and therefore, multidimensional scaling and semidefinite programming are chosen for this research. Multidimensional scaling is a simple mathematical technique widely-discussed that solves the wireless sensor networks localization problem. In contrast, semidefinite programming is a relatively new field of optimization with a growing use, although being more complex. In this paper, using extensive simulations, a detailed overview of these two approaches is given, regarding different network topologies, various network parameters and performance issues. The performances of both techniques are highly satisfactory and estimation errors are minimal

#### Enhancing Accountability and Trust in Distributed Ledgers. (arXiv:1606.07490v1 [cs.DC])

Permisionless decentralized ledgers ("blockchains") such as the one underlying the cryptocurrency Bitcoin allow anonymous participants to maintain the ledger, while avoiding control or "censorship" by any single entity. In contrast, permissioned decentralized ledgers exploit real-world trust and accountability, allowing only explicitly authorized parties to maintain the ledger. Permissioned ledgers support more flexible governance and a wider choice of consensus mechanisms. Both kinds of decentralized ledgers may be susceptible to manipulation by participants who favor some transactions over others. The real-world accountability underlying permissioned ledgers provides an opportunity to impose fairness constraints that can be enforced by penalizing violators after-the- fact. To date, however, this opportunity has not been fully exploited, unnecessarily leaving participants latitude to manipulate outcomes undetectably. This paper draws attention to this issue, and proposes design principles to make such manipulation more difficult, as well as specific mechanisms to make it easier to detect when violations occur.

#### Performance Analysis and Scaling Law of MRC/MRT Relaying with CSI Error in Massive MIMO Systems. (arXiv:1606.07480v1 [cs.NI])

This work provides a comprehensive scaling law and performance analysis for multi-user massive MIMO relay networks, where the relay is equipped with massive antennas and uses MRC/MRT for low-complexity processing. CSI error is considered. First, a sum-rate lower bound is derived which manifests the effect of system parameters including the numbers of relay antennas and users, the CSI quality, and the transmit powers of the sources and the relay. Via a general scaling model on the system parameters with respect to the relay antenna number, the asymptotic scaling law of the SINR as a function of the parameter scalings is obtained, which shows quantitatively the tradeoff between the network parameters and their effect on the network performance. In addition, a sufficient condition on the parameter scalings for the SINR to be asymptotically deterministic is given, which covers existing studies on such analysis as special cases. Then, the scenario where the SINR increases linearly with the relay antenna number is studied. The sufficient and necessary condition on the parameter scaling for this scenario is proved. It is shown that in this case, the interference power is not asymptotically deterministic, then its distribution is derived, based on which the outage probability and average bit error rate of the relay network are analysed.

#### Wireless Application Protocol Architecture Overview. (arXiv:1606.07479v1 [cs.NI])

WAP protocol is a set of communication protocols application environment and wireless devices. WAP model more than a WAP gateway. Suitable for wireless communication environment.

### Planet Theory

#### Efficient computation of middle levels Gray codes

Authors: Torsten Mütze, Jerri Nummenpalo
Abstract: For any integer $n\geq 1$ a middle levels Gray code is a cyclic listing of all bitstrings of length $2n+1$ that have either $n$ or $n+1$ entries equal to 1 such that any two consecutive bitstrings in the list differ in exactly one bit. The question whether such a Gray code exists for every $n\geq 1$ has been the subject of intensive research during the last 30 years, and has been answered affirmatively only recently [T. M\"utze. Proof of the middle levels conjecture. arXiv:1404.4442, 2014]. In this work we provide the first efficient algorithm to compute a middle levels Gray code. For a given bitstring, our algorithm computes the next $\ell$ bitstrings in the Gray code in time $\mathcal{O}(n\ell(1+\frac{n}{\ell}))$, which is $\mathcal{O}(n)$ on average per bitstring provided that $\ell=\Omega(n)$.

#### Efficient Analog Circuits for Boolean Satisfiability

Authors: Xunzhao Yin, Behnam Sedighi, Melinda Varga, Maria Ercsey-Ravasz, Zoltan Toroczkai, Xiaobo Sharon Hu
Abstract: Efficient solutions to NP-complete problems would significantly benefit both science and industry. However, such problems are intractable on digital computers based on the von Neumann architecture, thus creating the need for alternative solutions to tackle such problems. Recently, a deterministic, continuous-time dynamical system (CTDS) was proposed (Nature Physics, 7(12), 966 (2011)) to solve a representative NP-complete problem, Boolean Satisfiability (SAT). This solver shows polynomial analog time-complexity on even the hardest benchmark $k$-SAT ($k \geq 3$) formulas, but at an energy cost through exponentially driven auxiliary variables. With some modifications to the CTDS equations, here we present a novel analog hardware SAT solver, AC-SAT, implementing the CTDS. AC-SAT is intended to be used as a co-processor, and with its modular design can be readily extended to different problem sizes. The circuit is designed and simulated based on a 32nm CMOS technology. SPICE simulation results show speedup factors of $\sim$10$^4$ on even the hardest 3-SAT problems, when compared with a state-of-the-art SAT solver on digital computers. For example, for hard problems with $N=50$ variables and $M=212$ clauses, solutions are found within from a few $ns$ to a few hundred $ns$ with an average power consumption of ${130}$ $mW$.

#### A Dynamic Epistemic Framework for Conformant Planning

Authors: Quan Yu, Yanjun Li, Yanjing Wang
Abstract: In this paper, we introduce a lightweight dynamic epistemic logical framework for automated planning under initial uncertainty. We reduce plan verification and conformant planning to model checking problems of our logic. We show that the model checking problem of the iteration-free fragment is PSPACE-complete. By using two non-standard (but equivalent) semantics, we give novel model checking algorithms to the full language and the iteration-free language.

### Fefe

#### Ein Einsender merkt an:So zum unvorbereitet sein: Ist ...

Ein Einsender merkt an:
So zum unvorbereitet sein: Ist so. es gab offensichtlich keinen Plan & keinen Strategie von UK-Seite; offensichtlich wurde auf EU-Seite durchaus zumndest kurz darüber nachgedacht. BTW - IT-Technisch brennt da auch gerade ein bischen die Luft - auch grosse Konzerne haben da gepennt. Denn wenn GB Austritt & nicht absolut zeitgleich mittels z.B. EWR den Zugang zum Single Market schafft, dann gibts ein klitzekleines Problem mit dem Europäischen Datenschutz. Das erwischt einige, die auch 'Cloud' anbieten. Seit Freitag später Nachmittag gehts da rund - ich hatte das auch nicht auf dem Radar, bis die ersten Anfragen auftauchten. Ein paar Telefonate später war mir dann einiges etwas klarer.

### StackOverflow

#### How to interpret trees from random forest via python

I'm trying to figure out how I can go about interpreting my trees from my random forest. My data contains around 29,000 observations and 35 features. I pasted the first 22 observations, the first 11 features as well as the feature that I am trying to predict(HighLowMobility).

birthcohort countyfipscode  county_name cty_pop2000 statename   state_id    stateabbrv  perm_res_p25_kr24   perm_res_p75_kr24   perm_res_p25_c1823  perm_res_p75_c1823  HighLowMobility
1980    1001    Autauga 43671   Alabama 1   AL  45.2994 60.7061         Low
1981    1001    Autauga 43671   Alabama 1   AL  42.6184 63.2107 29.7232 75.266  Low
1982    1001    Autauga 43671   Alabama 1   AL  48.2699 62.3438 38.0642 72.2544 Low
1983    1001    Autauga 43671   Alabama 1   AL  42.6337 56.4204 38.2588 80.4664 Low
1984    1001    Autauga 43671   Alabama 1   AL  44.0163 62.2799 38.1238 73.747  Low
1985    1001    Autauga 43671   Alabama 1   AL  45.7178 61.3187 40.9339 83.0661 Low
1986    1001    Autauga 43671   Alabama 1   AL  47.9204 59.6553 47.4841 72.491  Low
1987    1001    Autauga 43671   Alabama 1   AL  48.3108 54.042  53.199  84.5379 Low
1988    1001    Autauga 43671   Alabama 1   AL  47.9855 59.42   52.8927 85.2844 Low
1980    1003    Baldwin 140415  Alabama 1   AL  42.4611 51.4142         Low
1981    1003    Baldwin 140415  Alabama 1   AL  43.0029 55.1014 35.5923 76.9857 Low
1982    1003    Baldwin 140415  Alabama 1   AL  46.2496 56.0045 38.679  77.038  Low
1983    1003    Baldwin 140415  Alabama 1   AL  44.3001 54.5173 38.7106 81.0388 Low
1984    1003    Baldwin 140415  Alabama 1   AL  46.4349 55.5245 42.4422 80.3047 Low
1985    1003    Baldwin 140415  Alabama 1   AL  47.1544 52.8189 42.7994 79.0835 Low
1986    1003    Baldwin 140415  Alabama 1   AL  47.553  54.934  42.0653 78.4398 Low
1987    1003    Baldwin 140415  Alabama 1   AL  48.9752 54.3541 39.96   79.4915 Low
1988    1003    Baldwin 140415  Alabama 1   AL  48.6887 55.3087 43.8557 79.387  Low
1980    1005    Barbour 29038   Alabama 1   AL                  Low
1981    1005    Barbour 29038   Alabama 1   AL  37.5338 54.3618 34.8771 75.1904 Low
1982    1005    Barbour 29038   Alabama 1   AL  37.028  57.2471 36.5392 90.3262 Low
1983    1005    Barbour 29038   Alabama 1   AL                  Low

Here is my random forest:

#Impute the missing values with median values,.
X = X.fillna(X.median())

#Dropping the categorical values
X = X.drop(['county_name','statename','stateabbrv'],axis=1)

#Collect the output in y variable
y = X['HighLowMobility']

X = X.drop(['HighLowMobility'],axis=1)

from sklearn.preprocessing import LabelEncoder

#Encoding the output labels
def preprocess_labels(y):
yp = []
#low = 0
#high = 0
for i in range(len(y)):
if (str(y[i]) =='Low'):
yp.append(0)
#low +=1
elif (str(y[i]) =='High'):
yp.append(1)
#high +=1
else:
yp.append(1)
return yp

#y = LabelEncoder().fit_transform(y)
yp = preprocess_labels(y)
yp = np.array(yp)
yp.shape
X.shape
from sklearn.cross_validation import train_test_split
X_train, X_test,y_train, y_test = train_test_split(X,yp,test_size=0.25, random_state=42)
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
training_data = X_train,y_train
test_data = X_test,y_test
dims = X_train.shape[1]
if __name__ == '__main__':
nn = Neural_Network([dims,10,5,1], learning_rate=1, C=1, opt=False, check_gradients=True, batch_size=200, epochs=100)
nn.fit(X_train,y_train)
weights = nn.final_weights()
testlabels_out = nn.predict(X_test)
print testlabels_out
print "Neural Net Accuracy is " + str(np.round(nn.score(X_test,y_test),2))

'''
RANDOM FOREST AND LOGISTIC REGRESSION
'''
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
clf1 = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0,       fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None)
clf2 = RandomForestClassifier(n_estimators=100, max_depth=None,min_samples_split=1, random_state=0)
for clf, label in zip([clf1, clf2], ['Logistic Regression', 'Random Forest']):
scores = cross_validation.cross_val_score(clf, X, y, cv=5, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

How would I interpret my trees? For example,perm_res_p25_c1823 is a feature that states the College attendance at ages 18-23 for child born at 25th percentile, perm_res_p75_c1823 represents the 75th percentile and the HighLowMobility feature states whether it there is High or Low upward income mobility. So how would show the following: "If the person comes from 25th percentile and lives Autauga,Alabama , then they will probably have lower upward mobility" ?

#### Add external library into current c++ project

I'm trying to use the xgboost library to basically translate a model I developed in R to C++. I've not used an external library and other sources online did not help me much so decided to ask the question here to hopefully find the way to do it.

Here's the library I need to use in my C++ code: https://github.com/dmlc/xgboost

I'm using Visual Studio Ultimate 2013. I have a make file that I use to define my project.

I am looking for the proper way to use this library in my C++ code. If you have ever used this library before, I really appreciate your help. Any hint will be appreciated.

### QuantOverflow

#### What is the difference between OIS Swap vs Basis Swap?

What is the use of OIS Swap Curve vs. Basis Swap Curve?

### TheoryOverflow

#### Mapping from NP-Complete to Probability-theory problems

Hi I'm a computer science student and i have an exam in "Introduction to Probability" (Computer Science)

Now i was wondering if instead of studying specifically for the exam material i can map it to a different NP-Complete problem that it will be easier for me to solve .. Like Clique or Vertex-Cover for example ?

Is the task of doing an NP problem at all?

i'm think that this problem is something we can even "mathematically" define wouldn't doing so will make it an NP problem, maybe i can reduce some-amount of problems from one to another ?

### CompsciOverflow

#### Planning interviews

This is real-world problem, but I need to model it with algorithm as I am going to implement it (probably with PostGIS and Google Maps).

### Problem is:

Everyday I am receiving job offers, and I have to schedule interview with them. I have already noticed, that there are two "hot" areas, where is large amount of offices, and around $30-40\%$ of all interviews are taking place in those two spots. I want to go to all interviews, however I want to start working at the end of this month, and this means, I won't be able to meet all employers, if I won't cluster them.

### My modeling approach:

The most simple version of this is of course Vehicle routing problem however, in real life I am not getting all $100$ offers with location at the same time, but rather smaller amount ($5-10$) every day, and at the same time so I cannot wait to gather all of them to start computing. This means that I need greedy solution. Additional constraint is that not every recruiter will accept any hour. It can be assumed, that I can do interviewing from $10:00$ to $19:00$, one interview takes $1$ hour, last interview can start at $18:00$. Given $2$ points, I am able (using Google Maps) to tell distance between them (counted in minutes). They can be broken down into three abstraction classes - near (0-15 minutes), medium (16-30), far (31-45). I am not accepting interviews farther than $45$ minutes away. Being late less than $10$ minutes, rescheduling meeting at most one time and offering recruiter less than four dates are constraints for professionalism, and can be exploited, only if this will allow me to schedule additional interview (being late $10$ minutes implies fact, that interview will end $10$ minutes later than expected). Interviews are taking place from Monday to Friday. It is also possible to hold response to offer for $24$ hours. From my empiric experiment, expected time to receive job offer by email is $10:00$ each day (most at Mondays), with standard deviation of $1$ hour.

### Current algorithm:

I am pessimistically assuming, that I will be 15 minutes late for every interview and every two locations are at maximum distance, that is $45$ minutes. In addition to $1$ hour of interview, it given me two full hours per each interview. Considering hours, each day is broken down into five free sports for interviews, starting at $10$, $12$, $14$, $16$ and $18$. I am maintaining list of all open spots for all days in week (five working days) and I am sending full list to each and every recruiter. First recruiter that accepts free spot is assigned to it, and I am marking spot as occupied in Google Calendar, and telling every next recruiter, that he needs to pick other date. When I will see, that recruiter wants to meet somewhere near (0-$15$ minutes) other location, and in the same time, next spot is open, I am suggesting this spot. If he accepts, I am appending him there, and then shifting all starting hours after him by plus 1 hour. If he does not accept, I am offering all available dates. As You can see, this algorithm guarantees me $5$ days times $5$ spots, $25$ interviews each week, plus around $2-3$ more from clustering near interviews. I need at least $30$ guaranteed spots to attend all meetings, and it would be great if it could be $35$, so I could finish them faster and be able to offer more dates for last interviewers. I am also trying to reschedule interviews, so that no two interviews, that first is immediately after seconds, are far, however, rescheduling is transactional operation, that usually takes around $24$ hours.

### Goal:

I am trying to figure out algorithm, that given current state of my Google Calendar (that includes start time and location of each interview) and set of buffered offers, will allow me to maximize amount of scheduled meetings. It can be assumed, that I am immediately informed about each offer (I have mobile Internet). I have same problem each year (as a full-time student, I am looking for job at the start of holidays and dropping it at the end of holidays), and as I suspect to study 5 more years, I need efficient solution.

### Fefe

#### Kurzer Aufruf an die Leser: Im Moment sind paradiesische ...

Kurzer Aufruf an die Leser: Im Moment sind paradiesische Zeiten für Burying. Wir haben Brexit, Fußball und auch noch Unwetter. Man muss schon massiv runterscrollen, um zu anderen Nachrichten zu kommen.

Wenn die Politik also Altöl zu verklappen hat, dann wäre jetzt der Zeitpunkt.

Achtet bitte mal darauf, was gerade so an euch vorbei scrollt.

Interessante Kandidaten wären:

• Unangenehme Reports ala "das eine Gesetz, das wir nochmal evaluieren wollten, war nicht nur wirkungslos sondern sogar kontraproduktiv", z.B. zum Arbeitsmarkt oder der Rentenpolitik oder so.
• Peinliche Ereignisse wie "die Mautdaten wurden gehackt"
• Neue stinkende Gesetzesentwürfe, entweder wegen "Geheimdienst- und/oder Polizeibefugnisse ausgeweitet" oder wegen "offensichtliche Klientelpolitik, wir wurden von den Hotelier geschmiert".
• Generell übelriechende Daten, aus denen sich ergeben würde, dass die Politik an einer Stelle massivst versagt hat oder sich hat schmieren lassen.
Wenn euch was auffällt: Immer her damit!

## June 26, 2016

### QuantOverflow

#### Explain Four Basic Axioms of Maximising Expected Utility

I begin learn PRM , Someone help me understand Four Basic Axioms of Maximising Expected Utility most intuitive way .Thank you very much

### StackOverflow

#### Maxout neurons: are the weights in the maxout function referring to 2 unique sets of weights?

I don't understand how maxout works and I suspect it's due to my visualization of the linear algebra multiplication. Basically, I'm under the impression that there are two sets of weights for the maxout functions, both individually trained and then only one is selected. But I'm suspecting this may be wrong, since I don't see a way that two different weights can be trained simultaneously in one feed forward run of the network.

Also, if the two weights w1 and w2 in the function does not refer to two unique sets of weights, then could there be more than two arguments being input to the maxout function, and of which only the max is chosen?

Here is the maxout function I read:

max((w1.T.dot(X) + b1), (w2.T.dot(X) + b2))

Is there a mental representation I could use to visualize this better?

#### Predict Chart Growth [on hold]

Lets say I have 500 stock charts: They show the value of a share for a specific date:

With Machine Learning I would like to predict the value for a abitrary stock on the next Date.

I thought I could train my model by looking on the growth rates of the stock values. Lets say I have a stock that costs:

Day1: 100 Day2: 95 Day3: 110 Day4: 110 Day5: 105 Day6: 110  NextDate: My prediction

Then I would look on the growth rates:

Day1: 0% Day2: -5% Day3: 15.78% Day4: 0% Day5: -4.54% Day6: 4.76%  NextDate: My prediction

And would train my model by looking always on 3 following dates and using the forth as label. That would be:

X: [0, -5, 15.78]  Y: 0
X: [-5, 15.78, 0]  Y: -4.54
X: [15.78, 0, -4.54] Y: 4.76

Then to predict the value on the next date I would input:

next_date_growth = model.predict([0, -4.54, 4.76])

I am beginner in machine learning, how would you predict stock prices?And what Sklearn algortithm should I use? Thanks

### Planet Theory

#### Assistant Professor in Computer Science at University of Warwick, UK (apply by July 8, 2016)

This is an opportunity to join one of the UK’s most prominent research-active CS departments.
Enthusiastic individuals are sought to make a major contribution to the Department’s activities in research and teaching.
You will have an outstanding academic record, and show clear promise of developing a career in academia. You will have a strong commitment to undergraduate and postgraduate ed

Email: G.Cormode@warwick.ac.uk

### CompsciOverflow

#### A modified version of two way DFA

Following an exercise from Hopcroft-Ullman's Introduction to automata theory:

Let's define $k-MDFA$ as two way deterministic finite automaton with $k$ markers, similarly to two way $DFA$ but with ability to place (or pick up) a marker on the input cell currently under the head, but with limitation to placing at most $k$ markers at any time during computation, and with single cell only being able to hold at most 1 marker at any given time.

Formally, it's a tuple $A=(Q, \Sigma, \delta, s, F)$ where

• $Q$ is finite set of states
• $\Sigma$ is the input alphabet
• $\delta : Q \times (\Sigma \hspace{2pt}\cup \hspace{2pt} \{\vdash, \dashv\})\times \{0,1\} \rightarrow Q \times \{\textbf{L,R}\}\times \{0,1\}$ is the transition function
• $s$ is the initial state and $F$ is the set of accepting states

Input tape of the automaton for word $w \in \Sigma^*$ is $\vdash w \dashv$.

Moreover the transition function satisfies $\delta(q,\vdash,m) = (q', R, m')$, $\hspace{2pt}\delta(q, \dashv, m) = (q', L, m')$ for all $q \in Q, \hspace{2pt} m \in \{0,1\}$ and for some $q' \in Q$ and $m' \in \{0,1\}$ (we do not allow automaton to leave the input).

If automaton attempts to place more than $k$ markers on the tape, the computation stops with rejection.

I have proved that $1-MDFA$ recognize exactly regular sets, but I can also show that with $2-MDFA$ I can recognize language $\{ ww : w \in \Sigma^*\}$, which is not a context free language. The second fact is fairly simple - here's an outline of how such automaton would work:

1. Check parity of input's lenght
2. Locate the middle of input by placing markers at both ends of the word, and then moving them one step closer towards the middle alternatingly.
3. Place marker on the first cell of input and on first cell of second half of the input
4. Compare letters under markers, if they differ enter reject state, if they are same, move both markers 1 step to the left and repeat. If you are about to move right marker onto symbol $\dashv$ enter accept state.

As such, clearly languages recognized by $k-MDFA$ are not contained in $CFL$ for $k \geq 2$.

My questions are:

1. Does increasing the amount of markers increase the computation power of $k-MDFA$ for $k \geq 2$?
2. Does allowing nondeterminism increase computation power of $k-MDFA$?
3. Can $k-MDFA$ recognize all context-free languages? If so, what's the minimal $k$ capable of expressing $CFL$?

Any references are welcome, if the question shows ignorance in the subject, I apologise.

### QuantOverflow

#### How can I determine the value of equity linked security like this

Underlying assets are three global stock index : Eurostoxx 50, HSI, KOSPI 200

Maturity: 36 months with advanced redemption date in every 6 months if prices of indexes satisfy given conditions at each date.

Payoff : 4.5%(annually)

In the case where prices of each indexes at the maturity date are all less than 60% of the prices at the initial date, you get loss.

I want to use Monte Carlo Simulation using R.

I plan to determine the expected returns and co-variance matrix of three indexes from historical data and generate random numbers which follow multivariate normal distribution.

Using those random numbers, I will generate paths of each stock indexes and determine the total payoff.

However, determining the expected return or variance is hard because, lately, historical returns of some of indexes are not good, not even positive.

How can I determine that? Also, If you give me any idea of improving or implementing this simulation, I'd really appreciate that.

Thank you.

### StackOverflow

#### Is there a set that is complete with respect to operations on iterables? [on hold]

Recently I read an article with strong inclination to functional programming that suggested that there is a small number of methods that you basically need to operate on iterables (there were like five or six of them, I guess something like flatMap, map, filter, find, some, reduce). So basically instead of using usual declarative programming style one can solely use these methods and write functional code by chaining these methods.

So I was wondering, if there is a way to prove that some set of operations on iterables is complete in a sense that they are sufficient to perform everything what usual declarative for, while etc. provide?

### CompsciOverflow

#### Amortised analysis of binary heap insert and delete-min

I'm trying to figure out how to do amortised analysis of heap insert and heap delete-min using potential function.

We can assume, that insert is O(logn) and delete-min is O(logn) too.

The goal is to prove, that amortised price of insert is O(logn) and amortised price of delete-min is O(1).

Can't figure out how to create a potential function.

POT.F = ?
INSERT c = logn + (something from 0 to xlogn)
DELETE-MIN c = logn + (something like -logn)

Could you guys help me? I've figure out so far that difference between potentials should be from 0 to xlogn so INSERT price would be logn and for DELETE-MIN, difference should be somewhere abour -logn to get constant price.

### StackOverflow

#### What does immutable variable in Haskell mean?

I am quite confused with the concept of immutable variables in Haskell. It seems like that we can't change the value of variables in Haskell. But when I tried following code in GHCI, it seemed like the value of variables did change:

Prelude> foo x=x+1
Prelude> a=1
Prelude> a
1
Prelude> foo a
2
Prelude> a=2
Prelude> a
2
Prelude> foo a
3

Does this conflict with the idea of immutable variables?

Many thanks!

### TheoryOverflow

#### Known and described subclasses of Context-Free Grammars class

I'm looking for various researches which consider specific subclasses of Context-Free Grammar class, i.e. some specific described cases, which differ from well-known:

• deterministic/non-deterministic
• ambiguous/unambiguous
• regular/non-regular

As an example of such "non-standard" subclasses are Visibly pushdown grammar described here. Any additional examples will be much appreciated.

More specifically, I'm wondering, are any described subclasses which can help distinguish this two very similar cases?

$G_1$:

$S\rightarrow aD$, $S\rightarrow cD$

$D\rightarrow abc$, $D\rightarrow abDc$

$G_2$:

$S\rightarrow aD$, $S\rightarrow cD$

$D\rightarrow abc$, $D\rightarrow abDc$, $D\rightarrow aDb$, $D\rightarrow aDc$

It is clear that both of them are unambiguous non-determ. CFG, and $\mathcal{L}(G_1)\subset\mathcal{L}(G_2)$. But these are common properties, I'm looking for specific differences.

### QuantOverflow

#### How can you find change in working capital and capital expenditures without a balance sheet?

I'm working with the following information trying to work through a valuation exercise and I'm absolutely stuck. How can I find ∆WC and CAPX with this information?

### CompsciOverflow

#### What is the amortized time complexity of inserting an element to this heap?

Assume you implement a heap using an array and each time the array is full, you copy it to an array double its size. What is the amortized time complexity (for the worst case) of inserting elements into the heap?

I think that we have $T(n) = n \cdot n$ (which is an upper bound on the total cost of a sequence of n operations in the worst case), and then the amortized complexity according to one formula is $\frac{T(n)}{n} = \frac{n^2}{n} = n$.

But I think it is very wrong because it is very clear from intuition that I should get $\log(n)$ ... So how should I calculate this?

### StackOverflow

#### Basic softmax model implementation on 150x150 images

I'n my learning of tensorflow I've tried to adapt the basic softmax MNIST example to work on my own image set. It's aerial photographs of buildings and I want to classify them by roof type. There are 4 such classifications that can be made.

The simple (maybe naive) idea was to resize the images (since they're not all the same) and flatten them. Then change the tensor shapes in the code and run it. Of course it doesn't work though. First let me show you the code.

filenames = []
with open('/home/david/DSG/id_train.csv') as csvfile:
one_hot_vec = [0, 0, 0, 0]
one_hot_vec[int(row[1])-1] = 1
filenames.append("/home/david/DSG/roof_images/" + str(row[0]) + ".jpg")

sess = tf.InteractiveSession()

filename_q = tf.train.string_input_producer(filenames)
__img = tf.image.decode_jpeg(value, channels=1)
_img = tf.expand_dims(tf.image.convert_image_dtype(__img, tf.float32),0)
img = tf.image.resize_nearest_neighbor(_img, [150,150])

# Actual model
x = tf.placeholder(tf.float32, [None, 22500])
W = tf.Variable(tf.zeros([22500, 4]))
b = tf.Variable(tf.zeros([4]))
y = tf.nn.softmax(tf.matmul(x, W) + b)

# Training algorithm
y_ = tf.placeholder(tf.float32, [None, 4])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y,1e-10,1.0)), reduction_indices=[1]))

# Evaluate model, this checks the results from the y (prediciton matrix) against the known answers (y_)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

coord = tf.train.Coordinator()
init_op = tf.initialize_all_variables()
sess.run(init_op)

# Loads and process all the images, adding them to an array for later use
images = []
for i in range(8000):
if i % 100 == 0:
print("Processing Images " + str(100*(i+100)/8000) + "% complete")
image = img.eval().flatten()
images.append(image)

# Train our model
for i in range(80):
print("Training the Model " + str(100*(i+1)/80) + "% complete")
batchImages = images[i*100:((i+1)*100)]
# Here's a debug line I put in to see what the numbers were

coord.request_stop()

As can be seen I print the y values from softmax as I'm going along. The result is tensors that exclusively look like this [0., 0., 0., 1.]. I thought this was pretty strange. So I printed the value of tf.matmul(x, W) + b.

[[-236.86216736 -272.89904785   59.67744446  450.08377075]
[-327.19482422 -384.06918335   87.47353363  623.79052734]
[-230.79460144 -264.78787231   60.29759598  435.28485107]
[-188.10324097 -212.30155945   53.8230629   346.58175659]
[-180.26617432 -209.45767212   48.90292358  340.82092285]
[-177.13232422 -200.59474182   45.97179413  331.75531006]
[-225.94104004 -258.97390747   61.54353333  423.37136841]
[-259.33599854 -290.73773193   67.69062042  482.38308716]
[-151.53468323 -174.09906006   39.97481537  285.65893555]
[-237.23356628 -272.71789551   65.12500763  444.82647705]
..... you get the idea
[-195.14971924 -221.30851746   53.09790802  363.36032104]
[-157.30508423 -175.47320557   40.4044342   292.37384033]
[-178.94332886 -203.36262512   47.0838356   335.22219849]
[-180.61688232 -200.0609436    45.12242508  335.55541992]
[-145.7559967  -163.06838989   35.25980377  273.56466675]
[-194.07254028 -213.78709412   53.14990997  354.70977783]
[-191.92044067 -219.13395691   49.84062958  361.21377563]]

For the first second and third elements calculating softmax manually you get numbers of the order of E-200, essentially zero. And then a number above 1 for the fourth element. Since the all follow this pattern clearly something is wrong.

Now I've checked the input's, I have my answers as one hot vectors like so [0, 1, 0, 0] and my images are flattened and the values normalized to 0 and 1 (floats). Just like the MNIST example.

I also noticed that in the MNIST example the values from matmul are much smaller. Of the order of E0. Is that because there is 784 elements on each image, as opposed to 22500? Is this the cause of the problem?

Heck maybe this will never work for some reason. I need some help.

EDIT: I decided to check if the image size was having any effect, and sure enough the matmul does give smaller numbers. However they still exhibit a pattern and so I ran it through softmax again and got this output:

[[  2.12474524e-20   1.00000000e+00   1.10456488e-18   0.00000000e+00]
[  3.22400550e-21   1.00000000e+00   1.24568592e-19   0.00000000e+00]
[  2.49283055e-28   1.00000000e+00   6.52334536e-26   0.00000000e+00]
[  4.73190862e-23   1.00000000e+00   3.71980738e-21   0.00000000e+00]
[  1.11151765e-26   1.00000000e+00   4.14652626e-24   0.00000000e+00]
[  2.23096276e-22   1.00000000e+00   7.21511359e-21   0.00000000e+00]
[  1.41888640e-23   1.00000000e+00   2.13637447e-21   0.00000000e+00]
[  3.55662848e-17   1.00000000e+00   5.14018079e-16   4.06785808e-33]
[  8.25783417e-26   1.00000000e+00   2.95267040e-23   0.00000000e+00]
[  1.09395607e-25   1.00000000e+00   3.76775998e-23   0.00000000e+00]
[  9.34879669e-13   1.00000000e+00   1.07488766e-11   7.21446627e-25]
[  3.09687017e-34   1.00000000e+00   5.22547065e-31   0.00000000e+00]
[  2.10362117e-22   1.00000000e+00   1.31067148e-20   0.00000000e+00]
[  5.86830220e-23   1.00000000e+00   9.55902033e-21   0.00000000e+00]
[  9.59656235e-17   1.00000000e+00   2.98987045e-15   7.10348533e-32]
[  2.33712669e-16   1.00000000e+00   3.26934410e-15   1.55066807e-31]
[  1.09302052e-27   1.00000000e+00   5.34793657e-25   0.00000000e+00]
[  1.67101349e-25   1.00000000e+00   1.15098012e-22   0.00000000e+00]
[  4.46111042e-26   1.00000000e+00   1.23599421e-23   0.00000000e+00]
[  1.31791856e-24   1.00000000e+00   2.25831162e-22   0.00000000e+00]
[  2.19408324e-12   1.00000000e+00   5.67631081e-11   1.22608556e-23]]

Something else must be wrong then.

### CompsciOverflow

#### Understanding The Mapping Of Edges to Nodes In A Graph Theory Problem

I am really confused with this problem.

Here's the problem:

You have $N$ points numbered $1$ through $N$,inclusive, and $N$ arrows again numbered $1$ through $N$,inclusive. No two arrows start at the same place, but multiple arrows can point to the same place and arrows can start and end in the same place. The arrow from place $i$ points to place $a[i-1]$,($a$ being an array representing the game board with $N$ elements and $i$ is between $1$ and $N$, inclusive).There are $0$ to $N$ tokens,inclusive, placed in those places and that, in each round, move along the arrows from their current place. If two or more tokens are in the same place, then you lose that game. But if that doesn't happen for the $K$ rounds specified, then you win the game. There may be multiple ways to solve the problem, but Two ways are different if there is some $i$ such that at the beginning of the game place $i$ did contain a token in one case but not in the other. Count those ways and return their count modulo $1,000,000,007$.

The whole problem is confusing to me, but what really confuses me is that it states that the arrow that starts from $i$ goes to $a[i-1]$. How I understand it, for the first example,( $\{1,2,3\} \;5$ Returns:$8$ ), if $a[1]=1$, $a[2]=2$, and $a[3]=3$, then $3$ maps to $2$ and $2$ maps to $1$, but then $1$ maps to $0$,(but point $0$ doesn't exist).

What would be more correct would be if $a[0]=1$, $a[1]=2$, and $a[2]=3$, but then all the points would map to themselves,(though it says in the example that the tokens don't move during the rounds).

I am probably way off, but I couldn't find many explanations, and the ones I found didn't make any sense to me, and I couldn't find many visual depictions either.

#### ORAM: Is it generic?

Recently, plenty of researchers are looking at designing efficient data-oblivious algorithms. Roughly speaking, an algorithm is said to be data-oblivious if its data access patterns are independent of the input i.e. the data access pattern of the algorithm does not leak any information about the input of the algorithm. Some of the algorithms already looked at include sorting, BFS, minimum spanning tree and convex-hull. These data-oblivious algorithms have various applications for the cloud and efficient secure computation.

My question: Is ORAM a generic method to transform any non-data-oblivious algorithm to its data-oblivious counterpart?

### TheoryOverflow

#### How do you get the Calculus of Constructions from the other points in the Lambda Cube?

The CoC is said to be the culmination of all three dimensions of the Lambda Cube. This isn't apparent to me at all. I think I understand the individual dimensions, and the combination of any two seems to result in a relatively straightforward union (maybe I'm missing something?). But when I look at the CoC, instead of looking like a combination of all three, it looks like a completely different thing. Which dimension do Type, Prop, and small/large types come from? Where did dependent products disappear to? And why is there a focus on propositions and proofs instead of types and programs? Is there something equivalent that does focus on types and programs?

### CompsciOverflow

#### Is this probability distribution data structure already discovered?

Problem formulation

I have implemented an interesting probability distribution data structure: having $n$ elements $a_1, a_2, \dots, a_n$ in the data structure with respective positive weights $w_1, w_2, \dots, w_n > 0$, upon request each $a_i$ may be returned with probability $$\frac{w_i}{\sum_{j = 1}^n w_j}.$$

A Java implementation of my data structure can be obtained here, yet I will specify the algorithm here textually:

There are two types of nodes, relay nodes and leaf nodes. The actual elements with their actual weights are stored in leaf nodes. What comes to relay nodes, we allow only those that have both children (the tree is binary). However, each relay node know how many leaves is there in the subtree starting from that very relay node; also each relay node knows the sum of weights of all its leaves.

Whenever adding a new element, we start traversing the tree from its root, and until we reach a leaf node, we choose that relay node, whose node count is smallest; this way it maintains a logarithmic height.

What comes to the removal of an element, the data structure maintains a hashtable mapping each element to its leaf node. So, first we get the leaf node, remove it, after which, we replace the parent relay node with the sibling leaf.

Whenever sampling an element, we start from root, and choose an appropriate relay node until we reach the appropriate leaf node, whose element we return.

All three basic operations seem to run in worst-case $\mathcal{O}(\log n)$, and my benchmarking supports the observation.

A simple visualization:

Question

Is this data structure already discovered? If yes, could you tell me the name of the paper that discusses it?

### StackOverflow

#### How to prove the reliability of a predictive model to executives?

I trained data from 500 devices to predict their performance. Then I applied my trained model to a test data set for another 500 devices and show pretty good prediction results. Now my executives want me to prove this model will work well on one million devices not only on 500. Obviously we don't have data for one million devices. And if the model is not reliable, they want me to discover the required amount of train data in order to make a reliable prediction on one million devices. How should I deal with these executives who don't have a background in statistical analysis and modeling? Any suggestions? Thanks

#### Does Apache Spark mllib Word2Vec gives vectors of bag of words or of individual word?

I have a txt document which I want to read and convert the individual words into its vector representation form. What I am getting is that, I am getting vector representation of every line which is a collection of words.

#### Represent a list of items in input CSV for H2O

How do I represent a set/list of items in the input data (data frame) for H2O?

I'm using sparkling water 1.6.5 with H2O Flow. My input data (columns in the CSV file) look like this:

age: numeric
gender: enum
hobbies: ?
sports: ?

hobbies and sports are lists/sets with a limited number of possible entries (~20 each). H2O does not seem to have a suitable data type for this. How do I export these into a CSV file that can be processed by H2O Flow?

#### Tensor Flow all predictions are 0

I'm running the following code for TensorFlow and all the probabilities are NaN and all the predictions are 0. The accuracy works, however. I have no idea how to debug this. Any and all help is appreciated.

x = tf.placeholder("float", shape=[None, 22])
W = tf.Variable(tf.zeros([22, 5]))

y = tf.nn.softmax(tf.matmul(x, W))
y_ = tf.placeholder(tf.float32, [None, 5])

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
#cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax  + 1e-50))
init = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init)

for i in range(100):
batch_xs, batch_ys = random.sample(allTrainingArray,100), random.sample(allTrainingSkillsArray,100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

#test on itself
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print "accuracy", sess.run(accuracy, feed_dict={x: batch_xs, y_: batch_ys})

probabilities = y
print "probabilities", probabilities.eval(feed_dict={x: allTrainingArray}, session=sess)

prediction=tf.argmax(y,1)
print "predictions", prediction.eval(feed_dict={x: allTrainingArray}, session = sess)

### Fefe

#### 2014 hat Schwedens Militär ja überraschend russische ...

2014 hat Schwedens Militär ja überraschend russische U-Boote in schwedischen Hoheitsgewässern gesichtet, woraufhin bei Saab ein Großauftrag der Regierung einging, auch mal zwei fette neue U-Boote zu bauen.

Meine Meldung damals. Es gab da aber mehrere "Sichtungen".

Aber hey, RT und Sputnik, wenn der Finger der Anklage auf Russland zeigt, dass sind ja total unglaubwürdige Quellen, nicht wahr? Junge Welt, Telepolis dazu. Oh, und: Springer. Wer den Unterschied findet, darf ihn behalten.

#### Ich finde es bemerkenswert, dass die EU besser auf ...

Ich finde es bemerkenswert, dass die EU besser auf den Brexit vorbereitet war als die Briten selbst. Wenn man sich den Fallout gerade so anschaut, dann werden sich die Briten echt tief bücken müssen, wenn sie jemals wieder rein wollen. Die EU ist sich ja sonst über nichts einig, aber darüber herrscht Konsens.

Auf der anderen Seite befürchte ich gerade, dass die ein-zwei inhaltlich korrekten Punkte der Kritik der Briten jetzt vergiftet werden. Die EU ist ja wirklich eine undemokratische, intransparente Bullshit-Bürokratur. Die Stimmen der Bürger aus verschiedenen Ländern zählen im Parlament unterschiedlich viel, das Parlament verdient den Namen eh nicht, denn Gesetze vorschlagen können sie nicht. Das kann nur die Kommission. Und die besteht nicht aus Vertretern, sondern aus nicht demokratisch legitimierten Botschaftern der Regierungen der Länder. Oder fühlt sich von euch jemand von Juncker, Oettinger oder Malmström vertreten?

Ich weiß nicht, wie es euch geht, aber das waren auch schon die Namen aus der EU-Kommission, die ich kenne. Und die kenne ich nur wegen ihrer Skandale oder weil sie so auffallend schlecht arbeiten, dass man sie deshalb kennt.

Aber diese Details wird in Zukunft niemand mehr ansprechen können, ohne mit Verweis auf den verkackten Brexit weggewischt zu werden.

So gesehen hätten die Briten die EU gar nicht nachhaltiger stärken können als über ihren Rohrkrepierer-Brexit jetzt. Wenn die Briten das verstehen, wird da das Heulen und Zähneknirschen groß sein.

#### Oh und eine Auflösung für eine Medienkompetenzübung ...

Oh und eine Auflösung für eine Medienkompetenzübung habe ich noch: Zu den Aprikosenkernen. Ich meinte damit eigentlich eine Amazon-Suche, aber ein Einsender fand diese wunderschöne Seite :-)

### StackOverflow

#### scala functional - methods/functions inside or outside case class?

as a beginner in Scala - functional way, I'm little bit confused about whether should I put functions/methods for my case class inside such class (and then use things like method chaining, IDE hinting) or whether it is more functional approach to define functions outside the case class. Let's consider both approaches on very simple implementation of ring buffer:

1/ methods inside case class

case class RingBuffer[T](index: Int, data: Seq[T]) {
def shiftLeft: RingBuffer[T] = RingBuffer((index + 1) % data.size, data)
def shiftRight: RingBuffer[T] = RingBuffer((index + data.size - 1) % data.size, data)
def update(value: T) = RingBuffer(index, data.updated(index, value))
def length: Int = data.length
}

Using this approach, you can do stuff like methods chaining and IDE will be able to hint methods in such case:

val buffer = RingBuffer(0, Seq(1,2,3,4,5))  // 1,2,3,4,5
val buffer2 = buffer.shiftLeft.shiftLeft  // 3,4,5,1,2

2/ functions outside case class

case class RingBuffer[T](index: Int, data: Seq[T])

def shiftLeft[T](rb: RingBuffer[T]): RingBuffer[T] = RingBuffer((rb.index + 1) % rb.data.size, rb.data)
def shiftRight[T](rb: RingBuffer[T]): RingBuffer[T] = RingBuffer((rb.index + rb.data.size - 1) % rb.data.size, rb.data)
def update[T](value: T)(rb: RingBuffer[T]) = RingBuffer(rb.index, rb.data.updated(rb.index, value))
def head[T](rb: RingBuffer[T]): T = rb.data(rb.index)
def length[T](rb: RingBuffer[T]): Int = rb.data.length

This approach seems more functional to me, but I'm not sure how practical it is, because for example IDE won't be able to hint you all possible method calls as using methods chaining in previous example.

val buffer = RingBuffer(0, Seq(1,2,3,4,5))  // 1,2,3,4,5
val buffer2 = shiftLeft(shiftLeft(buffer))  // 3,4,5,1,2

Using this approach, the pipe operator functionality can make the above 3rd line more readable:

implicit class Piped[A](private val a: A) extends AnyVal {
def |>[B](f: A => B) = f( a )
}

val buffer2 = buffer |> shiftLeft |> shiftLeft

Can you please summarize me your own view of advance/disadvance of particular approach and what's the common rule when to use which approach (if any)?

Thanks a lot.

#### How to put more weight on certain features in machine learning?

If using a library like scikit-learn, how do I assign more weight on certain features in the input to a classifier like SVM? Is this something people do or is there another solution to my problem?

### Planet Theory

#### Postdoc In Combinatorial Optimization at ETH Zurich (apply by July 31)

The Combinatorial Optimization group at ETH Zurich, headed by Prof. Rico Zenklusen, has an opening for a Postdoctoral Researcher. The position is initially for one year with the possibility of extension for a second year. The starting date is flexible between fall 2016 and spring 2017.

Email: ricoz@math.ethz.ch

### StackOverflow

#### Obtaining exact output values of SMO classification, before clearly demarcating them into classes

While running the SMO classifier in weka, if I have inputted my training labels as 0 and 5, (A binary set), then while running the classifier model on test data, are the outputs some decimal values between 0 and 5 which get distinctly classified into the two binary labels at a latter step, or are there no intermediate decimal values?

If they exist, how to obtain these intermediate decimal values?

(Eg, In the above model, does the smo/svm classifier assign values like 1 , 2, 3 and 4, or some other decimal/float value within the given range and then these get appropriately grouped under the 0 and 5 value classes).

### Fefe

#### Hat hier jemand einen "Smart TV" von Samsung?Und war ...

Hat hier jemand einen "Smart TV" von Samsung?

Und war so dämlich, ihn ans Internet anzuschließen?

Und jetzt bleibt der immer 30 Sekunden nach Anschalten hängen?

m(

### CompsciOverflow

#### Checking membership in DFA with fixed length using AC1 circuit?

I'm supposed to find circuits , which can solve the question of membership in a regular language A with fixed length. The depth is limited by O(log(n)) and the size by O(n). Divide and Conquer should be the way to go, but I always exceed the max size. Would really appreciate any help

### StackOverflow

#### How to change the color of a line with different pattern classifications in python

I have a time series dataset that I used a machine learning algorithm to identify distinct patterns. I have that classified but I want to check it visually to see how it did.

How do I make a time series graph with different colors for each pattern classification OR what is the best way to visualize or check for accuracy with time series classification data?

The data basically looks like this

DATE DEMAND CLASSIFICATION June 4 678 1

Generally the classification would look like this 0000000000000000011111111111111111110000000000000000000000000022222222222222222 etc.

Any help?

### CompsciOverflow

#### weaker condition than e-representative will suffice for agnostic PAC?

We were wondering, why is it necessary to demand that $|L_S(h)-L_D(h)|<\epsilon$ be true simultaneously for all $h\in H$. A much weaker demand would be that the inequality be true simultaneously for any two hypothesis. We know that it's not true because it would mean that all hypothesis classes are PAC learnable, but we can't find the problem with the following reasoning: Given $\epsilon, \delta$, use Hofding's inequality to find $m$ s.t. $|L_S(h)-L_D(h)|<\epsilon/2$ with probabality $\ge \sqrt(1-\delta)$. Now, let $h$ be a hypothesis that minimizes $L_S(h)$ and let $h^*$ be one that minimezes $L_D(h)$. Then, with probability $\ge 1-\delta$ we have: $L_D(h)\le L_D(h^*)+\epsilon$, as all we need is that the inequality will hold for both $h$ and $h^*$ and it does so with probability greater than $1-\delta$. Where is the mistake?

Thanks!

### QuantOverflow

#### How to take care of newly auctioned yield/price in fixed income data

This is a financial data cleaning question. I have raw price and yield data for US cash treasury across the curve. In the time-series there are jumps on the day after the treasury auction results come out. Prior to using the data, is it good practice to manually remove the jumps? Thanks.

### TheoryOverflow

#### Deciding whether a unary context-sensitive language is regular

It is a well-known result that the question

Does a context-free grammar generate a regular language?

is undecidable. However, it becomes decidable on a unary alphabet, simply because in this case, the classes of context-free and regular languages coincide.

My question is to know what happens for unary context-sensitive languages.

Is it decidable to know whether a given context-sensitive grammar on a unary alphabet generates a regular language.

If the answer is positive, an estimation of the complexity would be welcome.

### CompsciOverflow

#### Finding all soldier wins

One day a castle is attacked at sunrise (by surprise) by n soldiers.

Each soldier carries a canon and a rifle.

The castle has strength s.

On the first day each soldier shoots his canon at the castle costing the castle n strength points (i.e. the castle ends the first day with s=s-n strength points). After all the soldiers have fired, the castle sends dpw defenders to battle them.

In the ensuing days the castle and the soldiers battle it out following these rules:

1. All the soldiers fire first. A soldier can fire his canon at the castle or his rifle at one of the defenders (but not both and each soldier can only shoot once). One shot at a defender kills him. One shot at the castle decreases its strength by 1.
2. Then each of the d defenders shoots at one soldier (and only one) killing him.

3. If the castle still has strength points (i.e. s>0) it sends a new batch of dpw defenders at this point. The total number of defenders in the next round will be d=d+dpw.

4. Repeat 1 through 3 on each new day.

5. If all soldiers are killed by the defenders, the castle wins.

6. If there are zero defenders after the soldiers shoot and the castle strength is zero, the soldiers win.

The answer to this question gives the strategy the soldiers should follow to win in a minimum number of rounds.

If $$n\leq s<2n, dpw\geq n, 2n<dpw+s<(\phi+1)n$$, the strategy that leads to a victory for the soldiers in the minimum number of rounds consists of the soldiers shooting only at the castle in the first and second days and destroying it, and from then on, the soldiers trade fire with the remaining defenders winning in a total of $2+\max(k,0)$ days, where $$k=\left\lceil\frac{\log_\phi{\left(\frac{n+(dpw+s-2n)\phi^{-1}}{n-(dpw+s-2n)\phi}\right)}}{4}\right\rceil.$$

If $dpw<n$, the soldiers can win in a minimum of $c+\max(k,0)$ days, where $$c = \left\lceil\frac{\sqrt{dpw^2+4 n\cdot dpw}-dpw+2 n-2 s}{2 (dpw-n)}\right\rceil$$ and $$k = \left\lceil\frac{\log_\phi{\left(\frac{n+(s + c\cdot dpw-(c+1)n)\phi^{-1}}{n-(s + c\cdot dpw-(c+1)n)\phi}\right)}}{4}\right\rceil.$$ In this case they shoot, in each round, at the defenders first then the castle for $c$ days which is the time it takes to destroy the castle.

What is an algorithm to find systematically all the strategies that lead to a win by the soldiers not just the minimum day strategy?

For example if $n=10, s=12, dpw=10$, there are 2 solutions in which the soldiers win: The first where the number of soldiers, defenders and castle strength goes like this (in 3 rounds)

start: 'soldiers': 10, 'defenders': 0, 'castle-strength': 12

round1:'soldiers': 10, 'defenders': 10, 'castle-strength': 2

round2:'soldiers': 8, 'defenders': 2, 'castle-strength': 0

round3:'soldiers': 8, 'defenders': 0, 'castle-strength': 0

and the second solution where it goes like this (4 rounds)

start: 'soldiers': 10, 'defenders': 0, 'castle-strength': 12

round1:'soldiers': 10, 'defenders': 10, 'castle-strength': 2

round2:'soldiers': 9, 'defenders': 11, 'castle-strength': 1

round3:'soldiers': 6, 'defenders': 3, 'castle-strength': 0

round4:'soldiers': 6, 'defenders': 0, 'castle-strength': 0

(in the particular case where $dpw=n$, like in the example above, there is an infinite number of solutions as @rotia points out, where the 10 soldiers eliminate the 10 defenders in either the 3rd or 4th or 5th... round before destroying the castle)

### QuantOverflow

#### Where can I get equivalent of 3 months libor or swap historical data?

I am looking for 5 years of libor/swap data for major currencies. Daily, or even better hourly.

Is this available anywhere?

An example of what I would like is: Bloomberg ADSW2 CMPL Curncy.

Is there a free equivalent?

### TheoryOverflow

#### Finding a minimum tree which is isomorphic to a subtree of $T_1$ but not to a subtree of $T_2$

Consider the problem that receives two trees $T_1$, $T_2$, and asks to find a minimum size tree $T$ such that there exists a subtree of $T_1$ which is isomorphic to $T$, but there is no such isomorphic subtree in $T_2$.

What is known about the complexity of this problem?

### QuantOverflow

#### Time series of European sovereign credit ratings by the Big Three?

I would need time series, from 2000 to 2015 (if possible) of sovereign credit ratings by Moody's, S&P and Fitch. Could you suggest me a source or provide me such a dataset? Thank you very much!

### CompsciOverflow

#### How to efficiently determine whether a given ladder is valid?

At my local squash club, there is a ladder which works as follows.

1. At the beginning of the season we construct a table with the name of each member of the club on a separate line.
2. We then write the number of games won and the number of games played next to each name (in the form: player wins/games).

Thus at the beginning of the season the table looks like this:

Carol 0/0
Billy 0/0
Alice 0/0
Daffyd 0/0

Any two players may play a match, with one player winning. If the player nearest the bottom of the table wins, then the position of the players is switched. We then repeat step 2., updating the number of wins and games next to each player. For example, if Alice beats Billy, we have

Carol 0/0
Alice 1/1
Billy 0/1
Daffyd 0/0

These matches go on throughout the season and eventually result in players being listed in approximate strength order.

Unfortunately, the updating happens in a rather haphazard way, so mistakes are made. Below are some examples of invalid tables, that is, tables which could not be produced by correctly following the above steps for some starting order (we have forgotten the order we used at the beginning of the season) and sequence of matches and results:

Alice 0/1
Billy 1/1
Carol 0/1
Daffyd 0/0

Alice 2/3
Billy 0/1
Carol 0/0
Daffyd 0/0

Alice 1/1
Billy 0/2
Carol 2/2
Daffyd 0/1

Given a table, how can we efficiently determine whether it is valid? We could start by noting the following:

1. The order of the names doesn't matter, since we have forgotten the original starting order.

2. The total number of wins should be half the sum of the number of games played. (This shows that the first example above is invalid.)

3. Suppose the table is valid. Then there is a multigraph - a graph admitting multiple edges but no loops - with each vertex corresponding to a player and each edge to a match played. Then the total number of games played by each player corresponds to the degree of the player's vertex in the multigraph. So if there's no multigraph with the appropriate vertex degrees, then the table must be invalid. For example, there is no multigraph with one vertex of degree one and one of degree three, so the second example is invalid. [We can efficiently check for the existence of such a multigraph.]

So we have two checks we can apply to start off with, but this still allows invalid tables, such as the third example. To see that this table is invalid, we can work backwards, exhausting all possible ways the table could have arisen.

I was wondering whether anyone can think of a polynomial time (in the number of players and the number of games) algorithm solving this decision problem?

### StackOverflow

#### Text classification performance

So i am using textblob python library, but the performance is lacking.

I already serialize it and load it before the loop( using pickle ).

It currently takes ~ 0.1( for small training data ) and ~ 0.3 on 33'000 test data. I need to make it faster, is it even possible ?

# Some code:

# Pass trainings before loop, so we can make performance a lot better

# Specify witch classifiers are used by witch classes
filter_classifiers = get_classifiers_by_resource_names(trained_text_classifiers, config["FILTER_CLASSIFICATORS"])
signal_classifiers = get_classifiers_by_resource_names(trained_text_classifiers, config["SIGNAL_CLASSIFICATORS"])

for (url, headers, body) in iter_warc_records(warc_file, **warc_filters):
start_time = time.time()
body_text = strip_html(body);

# Check if url body passess filters, if yes, index, if no, ignore
if Filter.is_valid(body_text, filter_classifiers):
print "Indexing", url.url
else:
print "\n"
print "Filtered out", url.url
print "\n"
resp = 0

This is the loop witch performs check on each of the warc file's body and metadata.

there are 2 text classification checks here.

1) In Filter( very small training data ):

if trained_text_classifiers.classify(body_text) == "True":
return True
else:
return False

2) In index_document( 33'000 training data ):

prob_dist = trained_text_classifier.prob_classify(body)
prob_dist.max()

# Return the propability of spam
return round(prob_dist.prob("spam"), 2)

The classify and prob_classify are the methods that take the tool on performance.

#### MFCC Features Training with GMM-SVM: How can I fit the SVM by using the output matrix of each GMM prediction matrixes

I'm currently doing a speech recognition and machine learning related project. I'm taking MFCC vectors for wav files and for each classes that those wav files related I'm creating a GMM classifier. I have two classes now, therefore I also have two GMM classifiers.

Each GMM classifier is trained with two dimensional MFCC vectors. At this point I'm planning to train a SVM by using those GMM classifier results but I'm having a trouble about how to interpret the prediction vectors and how to train the SVM by using these several GMM classifiers per audio files.

def classify(mfccvalue):
#mfccvalue: is the mfcc vectors of the test sample.
clf = svm.SVC(gamma=0.001, C=100., probability=True)
for name, classifier in classifiers.iteritems():
#name is the class label, classifier is the GMM Classifier
print name, classifier.predict(mfccvalue)
#that returns me the label and vector such as
#> hope [1 0 0 0 1 .....1]

At this point I'm not sure how to train the svm classifier. I thought of training by using svm.fit(classifier) But this method, trains the SVM by using the test data's prediction that is created by GMM and I am not sure if it is a meaningful approach.

I'm probably missing some information regarding to SVM training pipeline or using the GMM in a wrong way. Any information and improvements are well appreciated.

Edit: A better question would be, How can I fit the SVM by using the output matrix of each GMM prediction matrixes. an example output of a GMM classifier for a particular label is something like this:

[[  9.99999999e-001   7.40924001e-010]
[  9.99999957e-001   4.32550136e-008]
[  1.00000000e+000   4.71021749e-012]
[  3.02192163e-002   9.69780784e-001]
[  2.15305731e-069   1.00000000e+000]
[  8.76744215e-086   1.00000000e+000]
[  2.18972304e-075   1.00000000e+000]
[  2.27091527e-050   1.00000000e+000]
[  3.08805294e-026   1.00000000e+000]
[  9.99999976e-001   2.42270712e-008]
..
[  1.07138606e-002   9.89286139e-001]]

### Lobsters

#### HTML5 Pong in 20min with C#/Bridge.NET

I recorded a short (20min) video of creating a simple Pong game in HTML5 using C#/Bridge.NET. It’s not the best Pong game in the world but it is from scratch with no libraries other than Bridge. Maybe useful if you’re thinking of picking up Bridge or building a simple Canvas game.

### TheoryOverflow

#### Can emptiness of reversal-bounded counter languages be decided in time polynomial to the number of counters?

I was reading this paper, about the complexity of decision problems for reversal bounded counter machines. I got to Theorem 1 on Page 6. The theorem shows that there's a log-space NTM which can determine if a non deterministic reversal-bounded counter machine is empty or not. (A log-space NTM can be converted into a polynomial time DTM).

The proof shows that, for input machine represented as a string of length $n$, with $m$ counters, that $O(m\log n)$ space is required.

Here's where I get lost. The paper says that, since $m$ is fixed, we can consider the machine to take $O(\log n)$ space.

Does this mean that the algorithm is only uses log-space if $m$ is fixed? Would the corresponding deterministic algorithm then be exponential in terms of $m$?

#### "Impredicative" in type theory

I am confused. I think I've read two usages of the word "impredicative" in type theory:

1. When people talk about the "impredicative" version of Martin-Löf's type theory, which they say it is inconsistent. Apparently they call it impredicative because of Type : Type.
2. When people talk about the "impredicative" Prop in CoC. Apparently they call it impredicative because you can quantify over Prop and get Prop stuff.

Is it the case? One notion of impredicativity implies the other? Is there no problem (i.e. inconsistency) by assuming "Prop impredicative"?

### QuantOverflow

#### DAX - company's weights

How often are company's weights being changed on DAX?

Where can I find historical data of DAX weights?

### StackOverflow

#### Swift compiler recursive code optimisations (similar to Haskell)

Does the Swift compiler use fusion to optimise code?

Let's say we want to write code to calculate the sum of the square roots of all positive numbers in a list. In Haskell you can write

sumOfSquareRoots xs = sum (allSquareRoots (filterPositives xs))
where
allSquareRoots []     = []
allSquareRoots (x:xs) = sqrt x : allSquareRoots xs

filterPositives []
= []
filterPositives (x:xs)
| x > 0     = x : filterPositives xs
| otherwise = filterPositives xs

This code is quite easy to read (the first line is very neat - almost English; the parts after the where are local.) This style also makes use of powerful built-in functions such as sum and we could make the other functions public and have them reused. So, good style.

However, we might be concerned that it is less efficient than having a one-pass function. (It pass through the list first to filterPositives then to get allSquareRoots of this and finally to sum this up.) Due to Haskell's, so-called, lazy evaluation execution strategy, however, the overhead is significantly less than in most other languages. Moreover, a good Haskell compiler can usually derive the one traversal version from the more elegant multiple traversal version using a process called fusion.

My question - does the Swift compiler deploy such optimisation strategies when compiling recursive functions?

### CompsciOverflow

#### Reduction from EMPTY to EMPTY_TAPE

I'm trying to show a reduction from $E_{TM} = \{<M> \ : \ L(M) = \emptyset\}$ to $EPSILON_{TM} = \{<M> \ : \ L(M) = \{\epsilon\} \}.$

I've tried the following:

The reduction checks if $w=\epsilon$ and accepts if it's true. Otherwise, it runs $M$ on $w$ and returns whatever it returns. But it has a problem, if $<M,w> \notin E_{TM}, w \neq \epsilon$ the run of $M$ on w might end in accepting even though $w\neq \epsilon$.

Any ideas how to improve this?

Thanks!

### infra-talk

#### Parallelize Development Using Git Worktrees

Recently, I was in a situation in which I really needed two separate copies of my Git repository. I was about to make a full clone of the repository, but I decided to see if Git had a better solution. And in fact, Git introduced the worktree feature not too long ago (as of version 2.5, released July 2015).

A worktree gives you an extra working copy of your repository, and it’s almost as easy as creating a new branch. All you need to do is set up a new worktree like this:

This will set up the directory ../new-worktree-dir as though it were a clone of your repository. You can create this directory anywhere on the same filesystem, but it should be somewhere outside of your main repository directory! You can then proceed to use the worktree directory as usual, checking out branches, pushing upstream, etc.

So why would you want one (or more) worktrees? Here a few good reasons:

## Run Tests While Working on Another Branch

In a large project with very good test coverage, some test suites can take a long time to run—far beyond a convenient amount of time to sit and wait for them to finish. In these cases, it can be helpful to run multiple test suites in parallel. Many IDEs allow opening multiple projects at once, but each must be in its own directory. You could git clone two entirely separate repositories, but worktrees are better:

• Worktrees are implemented using hard links, so they are lightweight and fast (whereas separate git clones copy down the full repository).
• You can share changes between worktrees (as long as they’re committed to at least the local repository). With full clones, you would have to push one repo to the remote and then pull it on the other.
• If you accidentally commit some changes to the wrong clone, you’ll have to port them over by hand (if they’re simple) or by using patch. With worktrees, you can just git cherry-pick and git reset to fix the mistake.

I frequently keep an extra worktree around just for running tests. One limitation of worktrees is that you can’t have the same branch checked out in multiple places. I get around this by creating local temporary branches, like so:

git co -b TEMP/original-branch-name feature/original-branch-name

I use the TEMP prefix to emphasize that the branch is temporary (my shell prompt includes the name of the current branch, so this practically yells it at me). When changes are committed on the original branch, a quick git merge feature/original-branch-name will catch up with the temporary branch.

## Compare Multiple Versions

Sometimes, you need to compare two versions of a project, but a simple diff just doesn’t cut it. What you really need is to see both versions simultaneously so you can compare things side-by-side or even run both versions at the same time. Or perhaps you are in the middle of a complicated change, and it’s hard to tell what you just broke. You can easily check out a previously tagged version or any arbitrary commit in a worktree.

## Work on a Different Branch without Disturbing Your Current Working Copy

Maybe you need to work on a different branch, but your current working directory is in such disarray that even git stash can’t help. Switching branches may also have undesired side effects depending on your project (for example, causing an IDE to re-index).

## Quickly Verify That the Project Works with a Clean Checkout

Everybody has probably had the experience of a build failure because a co-worker forgot to include some files with a commit. It could be that they forgot to add the files to Git, or perhaps some .gitgnore rules were too broad. If the build works for them but not for you, then you may be missing some files. One way to find out is to test it against a working copy that you know to be clean. Since worktrees give you a clean checkout, they can be used to verify that all of the files that need to be included have been added to Git. (This will only work reliably if you start by creating a new worktree.)

## Caveats

### Can’t work on the same branch simultaneously

This may seem like a limitation, but it’s really not a big deal. I do most of my work in the main repository directory and have one worktree directory for any/all of the reasons listed above. It’s easy enough to create a temporary branch to mirror an existing one.

### Doesn’t work with submodules

Repositories that utilize submodules currently cannot take advantage of worktrees.

## Cleanup

When you’re all finished with a worktree directory, you can just delete it (no need to use the git worktree prune command, but you can if you want). Git will automatically take care of cleaning up its internal record-keeping as part of its normal periodic garbage collection. But once you start using worktrees, you won’t need to think about how to get rid of them!

The post Parallelize Development Using Git Worktrees appeared first on Atomic Spin.

### StackOverflow

#### Immutable state in FP

This question came to mind when following some tutorials on Scala, but i think is interesting in general when it comes to functional programming.

I am unsure about the importance of immutability in FP. I can think of 2 different cases:

1) A class method does not return the actual fields but a copy of them. For example if we have a class Dog which we need to keep immutable, then its function:

getToys() { return new ArrayList(this.toys); }

getToys() { return this.toys; }

This case makes perfect sense to me, in the 2nd case client code could actually corrupt the object. My skepticism lies in the 2nd case:

2) In Scala and most other FP languages we prefer a recursive call:

sum(x: List[Int], acc: Int) {
if(x.isEmpty) return acc;
}

against a traditional for loop incrementing an accumulator. The reasoning is that this accumulator is a mutable variable.

That variable is never exposed outside the function so why care to make it immutable?

EDIT:

It seems like the most important best practice is referential transparency and not strict immutability, meaning roughly that we don't care about mutable state as long as it is not discoverable by client code. However people still claim that even when mutable state affects local variables (as in my 1st example) the code is more readable or easier to reason about.

Personally, i would argue that a loop is more readable than a recursion.

So the real question is: Why is code using immutable variables considered easier to read / reason about?

### QuantOverflow

#### What is the effect of mean-reversion on an upper barrier knock-out call option?

Consider a mean-reverting normal model for an underlying

$dX^{(1)}_t=-\kappa X^{(1)}_tdt+\sigma^{(1)} dW^{(1)}_t$,

for fixed time-independent constants, $\kappa$ (mean-reversion) and $\sigma^{(1)}$ (volatility) and Brownian motion, $W^{(1)}_t$. Suppose that using this model, I calculate options prices for all $t$, then calibrate the time-dependent local vol, $\sigma_t^{(2)}$, of a second normal model (without mean-reversion)

$dX^{(2)}_t=\sigma_t^{(2)} dW^{(2)}_t$,

so that the two models give the same prices for vanilla options at all times.

Will a continuous upper barrier knock-out call option be cheaper in the first or second model?

For simplicity, take $X_0=Y_0=0$, and assume that the upper barrier, $B$, is larger than the strike, $K$.

### TheoryOverflow

#### If set of all strings is countable so is real numbers?

I have been having this question for really long, if $\Sigma = \{0,1,2,3,4,5,6,7,8,9,.\}$ then obviously $\Sigma^*$ is obviously countable.

But real numbers are just a subset of them, but by diagonalization we can prove that they aren't. What detail am i missing out?

#### a linear sort program? [on hold]

I want to know the goal of the following question taken from my text book:

Suppose its known that each of the items in $a\space[1.\space.N]$ has one of two distinct values. Give a sorting method for such arrays that takes time proportional to $N$.

I did look up a few algorithms like count sort, radix sort, which tout to be linear, but actually it isn't: for radix sort its something like $O(W * N)$. And even tried the count sort myself.

But is that the goal of the exercise? To conclude that there is no linear sort algorithm (with the exception of spaghetti sort)?

### DragonFly BSD Digest

Did this early too, but ended up with lots of links.

### Fefe

#### Nachdem wir uns neulich über die Google-Anfragen der ...

Nachdem wir uns neulich über die Google-Anfragen der Briten lustig gemacht haben, sind jetzt die Amis dran.
Top trending questions on Hillary Clinton in the US last day.
1. Is Hillary Clinton going to jail?
2. What party is Hillary Clinton in?
3. Will Hillary win?
4. Why shouldn't you vote for Hillary Clinton?
5. Will Bernie support Hillary?

### StackOverflow

#### Writing a function to curry any function

For the record I find it very annoying that functions are not automatically curried in Scala. I'm trying to write a factory that takes in any function and returns a curried version:

def curry(fn:(_ => _)) = (fn _).curried

Basically what I have defined here is a function curry that takes as an argument a function fn that is of type _ => _ and returns a curried version of function fn. Obviously this didnt work because Java.

This was the error I got:

def curry(fn:(_ => _)) = (fn _).curried

Can any gurus out there help me figure out why this doesnt work? I don't mean to sound snarky, I am used to functional languages treating all types as functions. Please help this Scala newbie.

(I tagged this question with haskell because I'm trying to get Scala functions to behave like Haskell functions :'(

UPDATE

Just to clarify, I need a curryN function, so a function that curries any other function regardless of its arity.

Side note, some people have pointed out that increasing the number of fn's arguments would solve the problem. Nope:

def curry2(fn:((_, _) => _)) = (fn _).curried
def curry2(fn:((_, _) => _)) = (fn _).curried

### CompsciOverflow

#### Greedy algorithm correctness proof (UVA 10716)

Given an input string, not necessarily a palindrome, compute the number of swaps necessary to transform the string into a palindrome. By swap we mean reversing the order of two adjacent symbols (UVA 10716). I've found an AC solution with the following greedy algorithm (from UVA board): "For each letter find it's first and last occurence in current string. Select letter for which sum of distances from its first occurence to beginning and from last one to the end is smallest. Swap its first occurence to beginning and last to the end. And "eliminate" them --> now you have the word which is shorter. You have to repeat until you have <=1 letters left."

But I can't find the proof (and I can't construct it by myself). The question: how to prove the correctness of the algorithm?

#### Fundamental idea of this memory denial of service simulation using array copy?

I am just watching this lecture Computer Organization - Introduction And Basics where the lecturer mentions about using a simple code to demonstrate how memory denial of service can be simulated in DRAM by exploiting an optimzation DRAM Controller does internally. The optimization seems to be that DRAM Controller serves the same row buffer whenever possible for the instructions coming to the pipeline and so it can be exploited by sending instructions that use a very localized data giving it priority.

//initialize large arrays A and B

for(j=0; j<n; j++)
{
index = j*linesize;
A[index] = B[index]
}

I just wanted to understand the fundamental concept of this approach. The Lecturer says that the above code would copy an Array from A to B. However I am not sure how it actually would copy an Array from A to B because it doesn't seem to go over each index item.

My real doubt, assuming that this is just a simulation code which copies some part of B to A for demonstration purpose and that linesize is just a large value to make a cache miss, is if the central idea behind this something like:

a) chose an index that is sufficiently wide apart so that a cache miss happens

b) yet the memory is close enough for it to be in the same row for the memory controller to serve from the same row buffer

Which is needed to simulate this DoS (denial of service) scenario?

Quoting from the paper that explains this in depth Memory Performance Attacks - Denial Of Service In Multi Core Systems

The arrays in stream are sized such that they are much larger than the L2 cache on a core. Each array consists of 2.5M 128-byte elements. Stream (Figure 3(a)) has very high row-buffer locality since consecutive cache misses almost always access the same row (limited only by the size of the row-buffer).

### Fefe

#### Der Indische Supreme Court beschäftigt sich mit dem ...

Der Indische Supreme Court beschäftigt sich mit dem Antrag der Regierung, Whatsapp zu verbieten. Aus Gründen der nationalen Sicherheit, weil die verschlüsseln und das können wir dann nicht mitlesen.

#### Ich weiß nicht, ob ihr darauf geachtet habt, aber ...

Ich weiß nicht, ob ihr darauf geachtet habt, aber Cameron hatte vor Brexit deutlich angesagt, dass ein Austritts-Ergebnis direkt Artikel 50 triggern würde, sozusagen am nächsten morgen wäre das dann offiziell der EU gegenüber.

Davon war dann plötzlich keine Rede mehr, als es soweit war.

Das ist für Cameron-Verhältnisse ein guter Schachzug, denn wieso sollte er sich seinen Abschnitt in den Geschichtsbüchern damit vergiften? Insbesondere wenn er das Gülleschaufeln auch an seine Pro-Brexit-Nachfolger weiterreichen kann?

### Fred Wilson

#### Some Thoughts On Brexit

Clearly the UK’s decision to leave the EU is a big deal. The implications for the UK and Europe are large. And there is a feeling that similar politics will be adopted in other parts of the world. All of that is concerning. But my immediate reaction to the news when I woke to it on Friday morning was this:

I got some flack for being so “opportunist” and “financially minded” on a day when many people were dealing with the the reality that their world was going to change and maybe not for the better.

I realize that things like this are not all about money and financial opportunity. I realize that there will be a lot of pain for some in this transition.

But I am all for change. A static world is not a good world. And, as I said, with change comes opportunity.

Is it possible that the British Pound will be trading higher against the dollar and euro in a few years than it was before the vote? Absolutely. Is it possible that US stocks will recover from the losses they took on Friday and continue the eight year run they have been on? Absolutely. Is it possible that all the hand wringing over the potential for a global economic slowdown brought on by Brexit is overdone? Absolutely.

I am not saying all of these things will happen. But they could.

But more than that, going into a foxhole right now seems like the wrong idea. Some of the best companies have been created in times of great economic turmoil. And, because of that, some of the best venture capital investments have been made in times when everyone was risk averse. I am not for getting too excited when times are good and I am not for getting too conservative when times feel bad. I am all for looking for opportunity at every turn.

### QuantOverflow

#### Correct Alphabet (Google) market cap calculation?

Given the definition: Market capitalization (market cap) is the total market value of the shares outstanding of a publicly traded company; it is equal to the share price times the number of shares outstanding.

I find it quite puzzling to find different numbers everywhere for Google's (now renamed Alphabet) market capitalisation. I have summarised my findings in this Google sheet (https://docs.google.com/spreadsheets/d/1C8sSp7Kf3wdiiYFHCGM2I3qvjESbLjZR04OCFZHBDi0/edit?usp=sharing) hope you can all access it.

It ranges from the totally wrong for Market Cap (see CNBC), to inconsistent (see Nasdaq, WSJ and Yahoo finance) to differences in number of shares (Google finance and Bloomberg.com don't seem to agree on the number of outstanding shares). My aim is first to understand what is the right number for outstanding shares and market cap and second what is the right "price" for the Class B shares that are unlisted.

Data in the sheet is as of Feb 4th, 2016 11AM Sydney time (so based on closing prices, way after market closes).

### StackOverflow

#### learning words by machine learning

I make some operations on a special file which consists of words (just words, not sentences). I want to select the words which have two same letters one after another (like letters, little, all, yahoo). If I put words like these into a seperate file from main file and use naive bayes or another classifier, can I find this kind of words in the main file? Does machine learning work for these kind of words? Thank you in advance.

### CompsciOverflow

#### Is $\{a^nb^n\}\cup\{a^nb^{2n}\}$ LR(k)?

I was reading Knuth's paper "On The Translation of Languages from Left to Right", my particular interest being on RL($k$) languages (not a typo). By the end of the paper, he puts the grammar:

$$S \rightarrow Ac \\ S \rightarrow B \\ A \rightarrow aAbb \\ A \rightarrow abb \\ B \rightarrow aBb \\ B \rightarrow ab$$

Which generates the language $\{a^nb^n\}\cup\{a^nb^{2n}c\}$. He states that this language is clearly RL($k$), which is easy to see, and he proves that it cannot be LR($k$). But, in his proof, he states:

The problem is, of course, the appearance of "c" at the extreme right.

So, my doubt is: if it wasn't for the extra "c", could the language be LR($k$)? He remarks that, of course, the problem is the "c", but I don't see how I could write an LR($k$) grammar for $\{a^nb^n\}\cup\{a^nb^{2n}\}$.

#### Any tools to convert CTL* into an alternating automaton?

While one can use ltl3ba to convert LTL into an alternating automaton, I am not aware of any tools that accept CTL*. Are there any?

### QuantOverflow

#### Stress Testing for VaR

I am trying to perform stress testing for VaR and have taken into consideration two methods:- 1. Sensitivity analysis 2. Historical scenario analysis.

According to the Derivatives Policy group we need to take into consideration 5 factors which are:- o Parallel yield curve in ±100 basis points. o Yield curve shifts of ±25 basis points. o Stock index changes of ±10%. o Currency changes of ±6%. o Volatility changes of ±20%.

1. I am trying to perform the stress testing through sensitivity analysis in excel for which I am not able to figure out how to mould the prices for equities,bonds and derivatives by taking into account above factors through the excel function data table. For instance, if I take into account the 3rd factor mentioned above as STOCK INDEX CHANGES OF +- 10% and one of my stock in my portfolio is listed in Dow Jones, so how can I adjust the prices for a particular time period (say 6 months).?

2.Secondly if I take historical scenario analysis in which I am taking the scenario for instance 1997 Asian crisis, how do I adjust the prices in this scenario also. In this case, for instance, my portfolio contains all the asset class which are issued in the last 10 years and therefore I dont have any data (prices etc.) for them related to the 1997 asian crisis. SO how do I adjust the prices in this case also?.

P.S :-I am using variance covariance method for calculating VaR. Eagerly waiting for valuable suggestions on this.

### Lambda the Ultimate Forum

#### Is there a language with the ability to write arbitrary type functions?

I'm looking for a usable, fully implemented language that supports a kind of 'programming in types'. That is, it should allow the definition of type functions that take types as parameters, return values that are (existing or new) types, and support compile-time logic to implement those type functions. The generated types should of course then be used to instantiate functions that use those types.

I know that some of this is possible in C++, but the underlying type system in C++ is not really suitable.

I know that anything is possible in Lisp, but again I'm really looking for a language with a modern type system and functions on those types at compile time.

For background, this is in support of my Andl project link here. The need is to define generic operations on tuple types as they are transformed by the relational algebra, with full type inference.

### QuantOverflow

#### Deduce expected exposure profile from option/structure delta?

I am thinking about whether there exists a relationship between the delta of an option (or any structured derivative) and it's expected positive/negative exposure?

An intuitive question would be the following: A Foward has a Delta of 1 and given the above exposure profile and the Delta of an Option with the same underlying, can I deduce that the exposure profile of the Option equals Delta * Forward_Exposure?

However, after running some simulations I see that this is not the case, part of the reason being (I think) that for exposure generation one simulates values for all relevant risk parameters and not just the one which corresponds to the Delta/sensitivity.

If there are any questions on Definitions of terms I used, I am happy to clarify. Image taken from Jon Gregory's book on CVA.

#### Calculating historical implied volatility

I know that each individual option has it's own implied volatility, but how do you go about calculating the overall implied volatility for an underlying?

For example when someone sais the IV of a certain underlying is 40%, they are not referring to a specific option/strike. They mean that the option market as a whole is implying a volatility of 40%. How is that 40% calculated? Im guessing it is something along the lines of calculating the IV for every option available and taking some sort of average?

Secondly, how do you go about calculating the historical IV over a given time period. For example in most options trading platforms (eg: TWS, ThinkOrSwim, etc) you can pull up a chart of a specific underlying along with it's IV over a given time period. How would you go about recreating that?

Again I presume you do something like:

• One day at a time, get the closing price for every active option
• Calculate the IV for all the options at every strike
• Perform some sort of average
• Move to the next day

It seems impractical to calculate the IV of every single active option. Is it perhaps only done using the front month? (and if so, does that include weeklies and monthlies?)

#### Is there a way to meaningfully generate daily returns from monthly?

I have a set of 7 investments in a portfolio and I need to optimize the weightings based on some exposures to various markets/styles/economic factors. I was hoping to do some sort of simple exposure analysis or 'factor analysis' (not actual Factor Analysis, but more just a bunch of regressions), using daily returns of various risk factors (for example, SPX, TBills, MSCI, FamaFrench Factors, etc).

I only have daily returns for 5 of the 7 investments in the portfolio. I have monthly returns for the remaining two. Is there an easy way to do some sort of generation of daily returns from monthly returns, possibly modelling the monthly against the factors' monthly returns, and then generating daily returns based on the model? (I know this is circular, but I am spitballing.) The problem is that I need some way to tie or anchor the modeled daily returns back to the actual monthly returns.

Any ideas? And does this make sense?

### StackOverflow

#### Is there a (easy) way to connect AWS Redshift to Microsoft Azure ML?

I'm currently trying to evaluate Microsoft Azure ML for my company. I tried it a bit with offline data and it looks promising, but for better evaluation I want to use the online data we have. Our data is stored in Amazon Redshift service, and I couldn't find a way to permanently connect Redshift to Azure ML so it would take the new data, process it and store the results back in Redshift. Is there some way to use Redshift as a data source for Azure ML? Of course, the easier the better

### QuantOverflow

#### Skewed Student t distribution MLE and Simulation

I have Financial LOB data and I feel that a skewed t distribution will fit best. I have a problem trying to find the parameters using MLE numerically since Matlabs built in function does not allow for Skewed t-distn.

Can somebody point me to some code which will find the parameters? Or can someone offer advice for an easy way to do this? I also need to simulate using these parameters but I think this is easier

Cheers

### StackOverflow

#### Unit testing functions - passing dependencies as args vs global access

Consider a scenario where a function A depends on three functions — P, Q and R. P, Q, R themselves have been unit tested as they do some really complex computations. Now I need to test the function A and I have two options —

# Access P, Q, R directly from A

function A (params) {
... P()
... Q()
... R()
}

PROs: No mocking required except for params.
CONs: Unnecessarily testing the logic of P, Q & R.

# Inject P, Q, R as arguments into A

function A (P, Q, R, params) {
... P()
... Q()
... R()
}

PROs: A is tested in a more controlled environment, as P, Q, R as passed as args.
CONs: A lot of effort goes in keeping the mocked functions update to date with their original functions.

I want to know which approach is better of the two and how can I control their respective cons in a better fashion.

NOTE: A, P, Q, R are all pure.

### QuantOverflow

#### Step By Step method to calculating VaR using MonteCarlo Simulations

In trying to find VaR for 5 financial assets with prices over a long period of time(2000 days worth of data) how would I do the following:

1. Carry out monte-carlo simulation in order to find a VaR value, assuming all 5 assets are standard normally distributed.
2. Carry out monte-carlo simulation for all 5 assets to find a VaR value, assuming they follow a student-t distribution with 10 degrees of freedom?

I am trying to do this at both the 95% and 90% confidence levels, and simulate the data with 10,000 replications. Any help would be greatly appreciated. I have already created a Cholesky Decomposition Matrix but not sure how to use the data in it to get the VaR figure.

### StackOverflow

#### What would it take for procedural (but high level) languages like python to implement a functionally pure paradigm? [on hold]

I love python's clear syntax and the huge number of libraries that exist for it but I love that with Haskell you (according to my lecturers) can depend upon it not crashing (as long as you only use pure 'safe' functions).

Would it be possible to get the best of both worlds with a language that already exists or does that kind of paradigm shift require a new language that is separately define?

### Planet Emacsen

#### Michael Fogleman: 2014: A Year of Emacs

My Emacs "birthday" is January 21, 2014. That's right-- I just started using Emacs this year! While it's not quite my one-year birthday, I thought I'd take advantage of the annual review blogging tradition to do a retrospective on my first year in Emacs. Note: much of the following chronology takes place during an extended romp through Asia.

I installed Arch Linux in the Fall of 2013. When using Linux, you often need to edit configuration files. The friend who helped me set me up with Vim. I learned the basics--how to insert, save, and quit--and not much else.

In January, I decided it might be fun to give Emacs a try. Here is a picture of Marina Bay in Singapore, taken just before or while I did the Emacs tutorial:

As a life-long user of keyboard shortcuts, I appreciated Emacs' notation for key bindings. The tutorial begins with basic cursor controls; easy enough, but nothing exciting. I could see the line, sentence, and buffer equivalents being useful. But my jaw dropped when it taught me C-t, transpose-chars. I knew that mastering just that command would probably save me a lot of pain and typos, and that was probably the least of what Emacs could do.

While the Emacs tutorial probably isn't meant to take more than an hour or two on the first try, it took me probably twice as long. This was because I made Anki cards. Some people say that Emacs' learning curve is steep. For myself, I've found Emacs' learning curve to be gentle, but (pleasantly) endless. Then again, it was probably making Anki cards that made the learning curve less steep for me.

A variation of this card was probably one of my first Emacs cards:

Cloze deletion cards like this one are very convenient for this kind of thing. One reason is that it's easy to add new, related information later:

In February, the battery on my little Arch laptop died. I spent most of the month without a laptop, waiting for a new battery to arrive by mail. I borrowed my friend and travel partner's machine to review my Anki cards. Still, I did manage to have a little time to develop my Emacs-fu.

In Hanoi, the hotel room we stayed in had a (virus-ridden) Windows machine. What else could I do but install Emacs? At this time, my Emacs configuration was not that complex, so it wasn't worth adapting. As it turned out, that time with a bare Emacs proved especially helpful for mastering some of the key Emacs concepts that I hadn't quite made use of yet; in particular, I got the hang of using multiple buffers in one session. By the the end of February, my new battery had arrived, and I was back in the Emacs saddle.

In March, I converted my my Emacs configuration to an Org-literate format, and started tracking it with Git. This was really the first project that I used Git for, and it gave me an opportunity to learn Git.

The other highlight of March was starting to learn a little Elisp. I found Robert Chassell's "An Introduction to Programming in Emacs Lisp" especially helpful. Chassell's overview and Org-mode's fantastic documentation helped me to write my first significant piece of Elisp (some org-capture templates).

April started off with a bang when my Emacs configuration was featured in a major motion picture, Tron Legacy. But that wasn't nearly as exciting as making my first major mode, tid-mode.

In late June, Artur Malabarba launched his Emacs blog, Endless Parentheses. One of his early posts was about hungry-delete. I was excited about it, but found that it did not work with transient-mark-mode. Artur encouraged me to write a fix. I was very excited when Nathaniel Flath, the maintainer, agreed to merge my patches into hungry-delete. At about the same time, Artur posted an adapted version of my narrow-or-widen-dwim function to Endless Parentheses. Sacha Chua, Nathaniel Flath, and some other Emacs Lisp hackers also offered suggestions.

At the end of July, I started learning Clojure, and added an initial CIDER configuration. While reading the Clojure Cookbook, I decided to hack out a function that turned the pages. Ryan Neufeld, one of the co-authors, merged my pull request, and a while later, I saw my function mentioned on Planet Clojure.

In October, I purchased a Mac for a Clojure contracting job. (Knowing Emacs helped me get the job!) Adding the :ensure keyword to my use-package declarations made the Emacs configuration portion of my OS X set-up process near-instant and pain-free. It was a really cool feeling to be using the same Emacs configuration on two different machines. By the end of the month, both were running on the newly-released Emacs 24.4.

Once Emacs 24.4 was out, Artur posted an Emacs 25 wishlist to Endless Parentheses. I responded with this tweet:

This tweet indirectly led to the happy occasion of the Emacs Hangouts.

That just about brings us to this post. In retrospect, I can't believe it's been less than a year, or that I've learned so much. Thanks to everyone who has helped me so far on the endless path of mastering Emacs, especially Ben, Eric, Sacha Chua, Artur Malabarba, and Bozhidar Batsov.

I want to close this post out with Sacha's tweet about 2015:

Like Sacha, I want to learn how to write and run tests in Emacs. I want to learn more about org-mode's features and possible workflows. More broadly, I'd like to make more posts about Emacs, and find other ways to contribute to the Emacs community. I'd like to learn more about Emacs core development. In terms of contrib, I'd like to help accelerate the release of Magit's "next" branch. Above all, I'm sure I'll keep tweaking my configuration.

### QuantOverflow

#### Transforming log return volatility into standard return volatility

If I have a forecasted volatility of the log returns of say, 0.03, this is obviously transformed relative to the log I took of the returns. It strikes me that I should raise e to the power of the volatility I forecasted to in order to get back something "normal" looking. When I do this I get ~1.03, which seems really, really high. This would tell me I'm doing something wrong.

What is the right way to transform log return volatility back into non-logged volatility?

Thank you!

### StackOverflow

#### OpenCv - Assertion failed in svm train

I can find why i this happening. I have 5 classes and made labels and trainData but it gives assertion failed. Why? Labels are 1x1x884 ( i tried 1x884x1 also, it is the same ), and fmat 1 x 884 x 680400 , all float32. Error is :

and code:

HOGDescriptor hog;
vector<vector< float>> features;
int labels[884] = {};

int brojac = 0;
cv::String path1("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/Chihuahua/*.jpg"); //select only jpg
vector<cv::String> fn;
vector<cv::Mat> data;
cv::glob(path1, fn, true); // recurse
for (size_t k = 0; k<fn.size(); ++k)
{
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
vector< float> descriptorsValues;
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels[brojac] = 1;
brojac++;
data.push_back(im);

}

cv::String path2("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/ShihTzu/*.jpg"); //select only jpg
vector<cv::String> fn2;
vector<cv::Mat> data2;
cv::glob(path2, fn2, true); // recurse
for (size_t k = 0; k<fn2.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels[brojac] = 2;
brojac++;
data.push_back(im);
}
cv::String path3("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/Yorkshireterrier/*.jpg"); //select only jpg
vector<cv::String> fn3;
vector<cv::Mat> data3;
cv::glob(path3, fn3, true); // recurse
for (size_t k = 0; k<fn3.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels[brojac] = 3;
brojac++;
data.push_back(im);
}
cv::String path4("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/vizsla/*.jpg"); //select only jpg
vector<cv::String> fn4;
vector<cv::Mat> data4;
cv::glob(path4, fn4, true); // recurse
for (size_t k = 0; k<fn4.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels[brojac] = 4;
brojac++;
data.push_back(im);
}
cv::String path5("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/pug/*.jpg"); //select only jpg
vector<cv::String> fn5;
vector<cv::Mat> data5;
cv::glob(path5, fn5, true); // recurse
for (size_t k = 0; k<fn5.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels[brojac] = 5;
brojac++;
data.push_back(im);
}

Mat labelsMat(884, 1, CV_32FC1, labels);
Mat fmat(features[0].size(), features.size(), CV_32FC1, features.data());

Ptr<SVM> svm = SVM::create();
svm->setType(SVM::C_SVC);
svm->setKernel(SVM::LINEAR);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6));
svm->train(fmat, ml::SampleTypes::ROW_SAMPLE, labelsMat);

#### WEKA - ClassNotFoundException on Android

I've built a MultiClassClassifier with the option SMO in Weka GUI and used a MultilayerPerceptron as the calibration method. I've tested the model from Eclipse (Java) on multiple samples and it works really well on my PC. When I try and use the same classifier on Android I get an error that is caused by:

(&#92; time _ → () &lt;$tick time) https://hackage.haskell.org/package/gloss-1.10.1.1/docs/src/Graphics-Gloss-Interface-IO-Game.html playIO display backColor simResolution worldStart worldToPicture worldHandleEvent worldAdvance = playWithBackendIO defaultBackendState display backColor simResolution worldStart worldToPicture worldHandleEvent worldAdvance False https://hackage.haskell.org/package/gloss-1.10.1.1/docs/src/Graphics-Gloss-Internals-Interface-Game.html , Callback.Idle (callback_simulate_idle stateSR animateSR (readIORef viewSR) worldSR (&#92;_ -&gt; worldAdvance) singleStepTime) I don't get it. The code presented as a "proof of the concept" is nothing to do with the "true FRP principle" that he claims, I think. I am not a user of ReactiveBanana, so I'd like to double-check by people here. Thanks. # EDIT: I will post an answer by Cornal Elliot who is widely known known for the frontier of FRP. I discussed him on this issue by email for a month, and he generously allowed me to share his remarks here. Therefore, I decided not to accept the answers by students(according to their user profile, and somehow also new and chosen to answer my Q as the first one), instead, I think it's much better for our sake or constructive manner to introduce an answer by a FRP expert. Many thanks to him. Here are my Questions on this issue and answers by Cornal Elliot: ## 1 Reactive-banana "FRP" implementation After some conversations with the author a few years back, I haven't tracked reactive-banana closely. I think he was at least trying (which is better than most), but I don't know how well he did. ## 2 "true FRP basic principle" by some guy above That guy's description is not clear enough to for me to tell. If TimeStamp is another name for R (real numbers), then he might be close. However, I'm guessing that he means some kind of discrete system clock. Moreover, his remarks about applying f to 0, 1, 2, ... seems rather misguided. So, I have a feeling he also has some doubt on the "true FRP basic principle" claimed by some guy http://anond.hatelabo.jp/20160517023637 & http://anond.hatelabo.jp/20160518122300 Here, I also have to quote the FRP perspective by Cornal Elliot. ## two fundamental principles of FRP: (a) having a precise (and simple) denotation and (b) continuous time (behaviors as functions of real-valued time). There are a lot of ideas floating around about what FRP is. For me it's always been two things: (a) denotative and (b) temporally continuous. Many folks drop both of these properties and identify FRP with various implementation notions, all of which are beside the point in my perspective. To reduce confusion, I would like to see the term "functional reactive programming" replaced by the more accurate & descriptive "denotative, continuous-time programming" (DCTP), as suggested by Jake McArthur in a conversation last year. Here are some of his outstanding talk on FRP in youtube that he also shared with me. Lambda Jam 2015 - Conal Elliott - The Essence and Origins of Functional Reactive Programming Lambda Jam 2015 - Conal Elliott - A More Elegant Specification for FRP During the conversation in email and watching his talk, I think he values "continuous time" behavior, and feels the "some guy"'s perspective sounds like "discrete" that I strongly agree with him. He also answered a question : What is (functional) reactive programming? I personally organize his (a)&(b) down to his another answer above: 1. Dynamic/evolving values (i.e., values "over time") are first class values in themselves. Called these things "behaviors" which must be continuous ("point-wise") for functional composition. <-- (b) 2. Another type (family) of "events", each of which has a stream (finite or infinite) of occurrences. Each occurrence has an associated time and value. To account for discrete phenomena. 3. Having a precise denotation/specification (and more detail explanation) <-- (a) ### QuantOverflow #### How to perform risk budgeting for non-linear portfolios? I am using this question to compute optimal weights following a risk budgeting approach. The problem is I am using non-linear portfolios (options,equity,fixed income,fx). What I am looking for is that each asset class contributes the same amount of risk to the portfolio, and I am sure I can't use the regular approach if I have derivatives in my portfolio. ### Fefe #### Abgesehen von dem Whisky will man die Schotten auch ... Abgesehen von dem Whisky will man die Schotten auch wegen ihrer Kreativität in der EU haben, finde ich. (NSFW) ### infra-talk #### Three Reasons Ember.js Closure Actions Rule Closure actions were introduced in Ember 1.13. They are a huge improvement over the previous action infrastructure. In this post, I’ll highlight some of the things that make closure actions so awesome. ## 1. Improved Syntax The old style of action handling was not elegant. Here’s how passing action handlers used to look: <!-- /templates/components/parent-component.hbs --> {{old-component someAction='handleAction'}} // /components/old-component.js click() { this.sendAction('someAction'); } The parent template passed old-component the someAction property. Then, when old-component sent the action named someAction (via a string), parent-component’s action handler was invoked. The fact that someAction was hooked up to an action in the parent was completely implicit. The new syntax makes action-passing much more elegant. Here is how we do the same thing now: <!-- templates/components/parent-component.hbs --> {{new-component someAction=(action 'handleAction')}} // components/new-component.js click() { this.attrs.someAction(); } It’s obvious that when someAction is passed into new-component, it’s an action. When we want to trigger the action, we can just call the function. Another added benefit is that a JavaScript error will be thrown on page load if the component does not define the action hooked up to the template. Before, the error wouldn’t surface until the action was actually attempted. ## 2. Easier Data Propagation One of Ember’s core concepts is “data down, actions up” (DDAU). Put simply, data should flow down through an application, and actions (e.g., clicking, typing, etc.) should be responsible for bubbling information back up. Imagine a scenario where we are displaying a list of superheroes for different comic book publishers. We have a couple of related components. At the very bottom, we have a simple component responsible for displaying and selecting a superhero. When we select a superhero, we need to do some stuff in the application controller, which is at the very top of our application. Before closure actions existed, we had to bubble that action up through each layer of the application until we reached the top. Every component defined an action that simply sent another action. In other words, it was a lot of boilerplate code. Closure actions help us avoid most of that boilerplate code. We avoid it by accessing the special attrs property of the component. This object contains all of the attributes passed into a component, including closure actions. Let’s map out our scenario from the top down. <!-- /templates/application.hbs --> {{comic-publishers publishers=publishers selectSuperhero=(action 'selectSuperhero')}} <!-- /templates/components/comic-publishers.hbs --> {{#each publishers as |pub|}} <h2>{{pub.name}}</h2> {{superhero-list superheroes=pub.superheroes selectSuperhero=attrs.selectSuperhero}} {{/each}} <!-- /templates/components/superhero-list.hbs --> {{#each superheroes as |hero|}} {{superhero-list-item hero=hero select=attrs.selectSuperhero}} {{/each}} // /components/superhero-list-item.js click() { this.attrs.select(this.get('hero')); } Because we are leveraging the attrs property, we don’t have to define the boilerplate action handler in superhero-list.js or in comic-publishers.js, and our selection still bubbles all the way up to the application controller. ## 3. Added Action Currying Another awesome feature of closure actions is that they support currying. At any level in the chain of components, I can tack additional parameters onto an action. Revisiting our scenario from above, let’s say the application controller also needs to know which publisher the selected superhero belongs to. That’s easy; we can curry the action in the comic-publishers.hbs template to append the publisher and send that action up to our controller. Let’s check it out! <!-- /templates/components/comic-publishers.hbs --> {{#each publishers as |pub|}} <h2>{{pub.name}}</h2> {{superhero-list superheroes=pub.superheroes selectSuperhero=(action 'selectSuperhero' pub)}} {{/each}} // /components/comic-publishers.js actions: { selectSuperhero(publisher, hero) { this.attrs.selectSuperhero(publisher, hero); } } Now, whenever the selection bubbles up, the publisher is added to the parameters. ## See the Code All of the code from this post can be found here. I would love to know what other wins you’ve seen from using Ember’s closure actions. The post Three Reasons Ember.js Closure Actions Rule appeared first on Atomic Spin. ### Related Posts ### StackOverflow #### Machine Learning [on hold] I want to go with machine learning.As a beginner what should I need to know about any pre things and maths? I have programming experience and I want to learn by myself. Some advice and links will be helpful for me to start machine learning. Thank you. ### Lobsters #### experience, thoughts and best practices for pdf server side? I am curious to know what are the general opinions, directions and best practices about server side pdf generation. I want to break out my app’s client-side pdf export feat, and I was looking to spin up a service that renders a json payload to pdf. There’s a language/stack you suggest or that you have found particularly fit for this task? • node/pdfkit? • PHP? • clojure/pdfbox? • pandoc? I am even planning to do a spike and test using handlebars to render latex and then tex2pdf, but perhaps this is a good scenario to use amazon lambda.. What you think? #### My God, it's full of yaks! I got “hello world” to work, why can’t you get “hello world” to work? Reminded me of https://lobste.rs/s/ys558t/sad_state_web_app_deployment. Comments #### "Static vs. Dynamic" Is the Wrong Question #### Understanding the SAFE Consensus ### Planet Emacsen #### Grant Rettke: Making the Repl sing electric ### DragonFly BSD Digest #### In Other BSDs for 2016/06/25 A good amount of user group material this week. ### UnixOverflow #### Installed ImageMagick7-7.0.1.5 on FreeBSD 10.3 but there is no import command available after installation After running$ sudo pkg install ImageMagick7-7.0.1.5 on FreeBSD 10.3, noticed that what I needed is not installed: import

Am I missing something?

### StackOverflow

#### OpenCv SVM and Hog routine for classification objects

I made 5 folders with 5 different types of dogs, extracted HOG features, made labels, and I wanted to do svm train, but it gives my cv::Exception at memory location. What I did wrong? After that, I wanted to make folder with test images and use svm predict. Is it problem with conversion from vector to mat file?

vector<vector< float>> features;
vector<int> labels;

cv::String path1("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/Chihuahua/*.jpg"); //select only jpg
vector<cv::String> fn;
vector<cv::Mat> data;
cv::glob(path1, fn, true); // recurse
for (size_t k = 0; k<fn.size(); ++k)
{
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
vector< float> descriptorsValues;
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels.push_back(1);
data.push_back(im);
}

cv::String path2("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/ShihTzu/*.jpg"); //select only jpg
vector<cv::String> fn2;
vector<cv::Mat> data2;
cv::glob(path2, fn2, true); // recurse
for (size_t k = 0; k<fn2.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels.push_back(2);
data.push_back(im);
}
cv::String path3("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/Yorkshireterrier/*.jpg"); //select only jpg
vector<cv::String> fn3;
vector<cv::Mat> data3;
cv::glob(path3, fn3, true); // recurse
for (size_t k = 0; k<fn3.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels.push_back(3);
data.push_back(im);
}
cv::String path4("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/vizsla/*.jpg"); //select only jpg
vector<cv::String> fn4;
vector<cv::Mat> data4;
cv::glob(path4, fn4, true); // recurse
for (size_t k = 0; k<fn4.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels.push_back(4);
data.push_back(im);
}
cv::String path5("C:/Users/Kristina/Desktop/Kompjuterska vizija/PrviProjekat/PrviProjekat/pug/*.jpg"); //select only jpg
vector<cv::String> fn5;
vector<cv::Mat> data5;
cv::glob(path5, fn5, true); // recurse
for (size_t k = 0; k<fn5.size(); ++k)
{
vector< float> descriptorsValues;
if (im.empty()) continue; //only proceed if sucsessful
// you probably want to do some preprocessing
cvtColor(im, im, CV_RGB2GRAY);
resize(im, im, Size(200, 200));//resize image
hog.compute(im, descriptorsValues);
features.push_back(descriptorsValues);
labels.push_back(5);
data.push_back(im);
}

Mat M2 = Mat(features[0].size(),884, CV_32FC1);
memcpy(M2.data, features.data(), features.size()*sizeof(float));

Mat l = Mat(1,features[0].size(), CV_32FC1);
memcpy(l.data, labels.data(), labels.size()*sizeof(float));

Ptr<ml::SVM> svm = ml::SVM::create();
svm->setType(ml::SVM::C_SVC);
svm->setKernel(ml::SVM::POLY);

svm->train(M2, ml::ROW_SAMPLE, l);

### QuantOverflow

#### Efficiently storing real-time intraday data in an application agnostic way

What would be the best approach to handle real-time intraday data storage?

For personal research I've always imported from flat files only into memory (historical EOD), so I don't have much experience with this. I'm currently working on a side project, which would require daily stock quotes updated every minute from an external feed. For the time being, I suppose any popular database solution should handle it without sweating too much in this scenario. But I would like the adopted solution to scale easily when real-time ticks become necessary.

A similar problem has been mentioned by Marko, though it was mostly specific to R. I'm looking for a universal data storage accessible both for lightweight web front-ends (PHP/Ruby/Flex) and analytical back-end (C++, R or Python, don't know yet).

From what chrisaycock mentioned column oriented databases should be the most viable solution. And it seems to be the case.

But I'm not sure I understand all the intricacies of column oriented storage in some exemplary usage scenarios:

• Fetching all or subset of price data for a specific ticker for front-end charting
• Compared to row based solutions fetching price data should be faster because it's a sequential read. But how does storing multiple tickers in one place influence this? For example a statement like "select all timestamps and price data where ticker is equal to something". Don't I have to compare the ticker on every row I fetch? And in the situation where I have to provide complete data for some front-end application, wouldn't serving a raw flat file for the instrument requested be more efficient?
• Analytics performed in the back-end
• Things like computing single values for a stock (e.g. variance, return for last x days) and dependent time-series (daily returns, technical indicators etc.). Fetching input data for computations should be more efficient as in the preceding case, but what about writing? The gain I see is bulk writing the final result (like value of computed indicator for every timestamp), but still I don't know how the database handles my mashup of different tickers in one table. Does horizontal partitioning/sharding handle it for me automatically or am I better splitting manually into table per instrument structure (which seems unnecessary cumbersome)?
• Updating the database with new incoming ticks
• Using row based orientation would be more efficient here, wouldn't it? And the same goes about updating aggregated data (for example daily OHLC tables). Won't it be a possible bottleneck?

All this is in the context of available open source solutions. I thought initially about InfiniDB or HBase, but I've seen MonetDB and InfoBright being mentioned around here too. I don't really need "production quality" (at least not yet) as mentioned by chrisaycock in the referenced question, so would any of this be a better choice than the others?

And the last issue - from approximately which load point are specialized time-series databases necessary? Unfortunately, things like kdb+ or FAME are out of scope in this case, so I'm contemplating how much can be done on commodity hardware with standard relational databases (MySQL/PostgreSQL) or key-value stores (like Tokyo/Kyoto Cabinet's B+ tree) - is it a dead end really? Should I just stick with some of the aforementioned column oriented solutions owing to the fact that my application is not mission critical or is even that an unnecessary precaution?

Thanks in advance for your input on this. If some part is too convoluted, let me know in a comment. I will try to amend accordingly.

EDIT:

It seems that strictly speaking HBase is not a column oriented store but rather a sparse, distributed, persistent multidimensional sorted map, so I've crossed it out from the original question.

After some research I'm mostly inclined towards InfiniDB. It has all the features I need, supports SQL (standard MySQL connectors/wrappers can be used for access) and full DML subset. The only thing missing in the open source edition is on the fly compression and scaling out to clusters. But I guess it's still a good bang for the buck, considering it's free.

### StackOverflow

#### Which algorithms are used for human activity recognition from images? [on hold]

I'm wondering which algorithms are best for human activity recognition from images and why? I should perform image classification based on these activities

#### How to achieve strong consistency in MongoDB Replica Sets?

In MongoDB documentation, here, it has been mentioned that in a replica set even with majority readConcern we would achieve eventual consistency. I am wondering how is this possible when we have majority in both reads and writes which leads to a quorum (R+W>N) in our distributed system? I expect a strong consistent system in this setting. This is the technique which Cassandra uses as well in order to achieve strong consistency.

Can someone clarify this for me please?

### Fred Wilson

#### Video Of The Week: Made In America

The Gotham Gal and I are making our way through the OJ Simpson documentary, Made In America. It’s a fascinating tale that weaves OJ’s story with the story of race relations in LA from the 60s to the 90s. It is very well done.

Here’s a podcast where my friends John and Will discuss it at length and talk to the director, Ezra Edelman.

### CompsciOverflow

#### Is TD-learning considered a model-based algorithm?

Differently from Sarsa and Q-learning, pure temporal difference learning (TD-learning) works with state value functions $V(s)$ and not state-action Q value functions $Q(s,a)$. It means that, in order to select the best action at state $s$, the resulting state $s'$ for every possible action must be computed, so we can get $V(s')$. Thus, we need a model for computing $s'$ from a $(s,a)$ pair. Is that correct? Does that mean that TD-learning can be considered a model-based technique? Or in the case of TD-learning we consider it just a value updating algorithm and ignore the control part, thus removing the need for a model?

#### reinforcement learning in gridworld with subgoals

Andrew Ng, Daishi Harada, Stuart Russell published a conference paper entitled Policy Invariance Under Reward Transformations: Theory and Application to Reward Shaping.

There is a specific example there that I am extremely curious/interested about. It is in Figure 2(a) of the paper:

It is about a 5x5 gridworld with start and goal states in opposite corners. The catch is, the agent must learn to go from start to end BY VISITING specific subgoals 1,2,3,4 IN ORDER.

Has anyone seen/understood the code for this? I want to know how the reward function/shaping is given in this kind of problem.

I am interested to know how the flow of this modification to the grid world is written.

### StackOverflow

#### One class SVM to detect outliers

My problem is

I want to build a one class SVM classifier to identify the nouns/aspects from test file. The training file has list of nouns. The test has list of words.

This is what I've done:

I'm using Weka GUI and I've trained a one class SVM(libSVM) to get a model.

Now the model classifies those words in test file that the classifier identified as nouns in the generated model. Others are classified as outliers. ( So it is just working like a look up. If it is identified as noun in trained model, then 'yes' else 'no')

So how to build a proper classifier?. ( I meant the format of input and what it information it should contain?)

Note:

• I don't give negative examples in training file since it is one class.
• My input format is arff
• Format of training file is a set of word,yes
• Format of test file is a set of word,?

EDIT My test file will have noun phrases. So my classifier's job is to get the nouns words from candidates in test file.

### QuantOverflow

#### Open source equity/bond index data

I have been using the tseries package of R (get.hist.quote) to get historical quotes for various indices from yahoo finance. I am interested in DAX, VDAX, EB.REXX and DJ UBS Commodity Index. When I tried to expand the time window for my analyses I saw that all time series except DAX and VDAX are discontinued.

My questions:

1) Do you know why EB.REXX (the symbol was REX.DE) dissapeared on yahoo finance (I now use EB.REXX 10 years, REX0.DE, but it is also discontinued) and why I can not find DJ UBS Cdty Index (symbol: ^DJUBS) anymore?

I use code like

require(tseries)

but both times series end in the 2nd half of 2012.

2) Do you know any R-compatible open data source where I can get

1. a price or performance index for German or core-EURO government bonds (like eb.rexx)
2. a price or performance index for broad commodities (like DJ UBS Cdty Index)?

EDIT: I started to try getSymbols of the quantmode package.

1. In google finance I found INDEXDB:RXPG for EB.REXX and INDEXDJX:DJUBS for DJ UBS - are these the correct indices? Where do I find any description of the data?
2. The example taken from the manual - getSymbols("MSFT",src="google") - works, but what I would need for the index data - getSymbols("INDEXDB:RXPG",src="google") - does not ...

### StackOverflow

#### How can I get the value of the error during training in Tensorflow?

In the TensorFlow MNIST beginners tutorial, code excerpts here:

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
sess = tf.Session()
sess.run(init)

#-----training loop starts here-----
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

Is it possible to access/retrieve the values of the cross_entropy error, Weights, and biases while inside the loop? I want to plot the error, and possibly a histogram of the weights.

Thanks!

#### Functional Programming in C++

Can someone guide me how do functional programming in C++? Is there some good online material that I can refer?

Please note that I know about the library FC++. I want to know how to do that with C++ standard library alone.

Thanks.

### Planet Theory

#### Open problems from SWAT

I'm now making my way back from Iceland, where I attended the 15th Scandinavian Symposium and Workshops on Algorithm Theory (SWAT 2016). Rather than providing a run-down of the talks and results I found interesting (you can choose your own from the conference proceedngs, this year provided as open-access through LIPIcs), I thought it might be more fun to mention a few open problems that people mentioned along the way and that caught my attention.

The first invited talk, by Julia Chuzhoy, concerned the relation between grid minor size and treewidth. There exist graphs of treewidth t whose biggest grid minor has side length O(sqrt t/log t), and the results she spoke about give polynomiial lower bounds, but with much smaller exponents (currently around 1/19). As her abstract states, "an important open question is establishing tight bounds" for this problem.

From Zachary Friggstad's talk on integrality gaps for directed Steiner trees, we have the situation that (for directed Steiner tree problems with t terminals) it is possible to approximate the solution within a tε factor, for any ε > 0, in polynomial time, or to within a polylog factor in quasipolynomial time. Is a polynomial-time polylog-factor approximation possible?

Daniël Paulusma spoke on finding square roots of k-apex graphs; that is, one is given a graph G that could be made planar by the deletion of k vertices, and the goal is to find another graph R on the same vertex set such that two vertices are adjacent in G if and only if their distance in R is at most two. There are a lot of other graph classes for which the problem is known to be polynomial or hard, but he listed as open finding square roots of split graphs or cographs, and finding graph square roots that are planar.

Jens Schmidt's talk was motivated by some older work of Mikkel Thorup on recognizing map graphs, the intersection graphs of interior-disjoint simply-connected regions in the plane. These differ from planar graphs because of the four corners phenomenon, where Arizona, Colorado, New Mexico, and Utah all meet at a point (and are all adjacent in the map graph). Thorup gave a polynomial time recognition algorithm but his exponent is huge and his algorithm does not provide a combinatorial certificate of being a map graph (a planar bipartite graph whose half-square is the given graph). Schmidt gave a more satisfactory solution to a special case but would like a better algorithm for the general problem.

My own talk concerned cuckoo filters, a more space-efficient replacement for Bloom filters. A data structure of Pagh et al provides a theoretical but not-yet-practical solution for the same problem (for all false positive rates), but cuckoo filters only work well when the false positive rate is small enough. What about if we want a practically efficient filter that gives a high false positive rate, say 20%? Can we get close to optimal space in this case?

Konrad Dabrowski made progress on classifying which sets of forbidden induced subgraphs lead to graph families with bounded clique-width by showing that (diamond,P1+2P2)-free graphs and four other similarly defined classes do have bounded clique-width. But as he reports, there are many remaining open cases.

Dana Richards spoke on a problem of comparison-sorting a set of items when only certain pairs of items are allowed to be compared, but his paper has more questions on this problem than answers. Any set of comparisons that you make (a subset of allowed ones) gives you a DAG with an edge for each comparison, oriented from smaller to larger, and you can infer the results of all comparisons in the transitive closure of this DAG. The goal is to either test or infer the results of all allowed comparisons. You can do it in a sublinear number of comparisons when either the set of allowed comparisons is small (by just doing them all) or its complement is small (by Richards' algorithm) but what he would like to prove is a subquadratic (deterministic) bound that holds in all cases.

Timothy Chan revisited one of the first kinetic data structures, for closest pairs (and/or nearest neighbors). The static version of the problem can be solved in O(n log n) time in any constant dimension, but previous kinetic structures had a factor of logdimension in their time bounds. Chan eliminated this, but at the expense of making the time bound depend on the geometry of the input (the range of values that the closest pair might span) instead of just on the number of points. He asked whether it is possible to eliminate both the dependence on dimension and the dependence on geometry.

### QuantOverflow

#### Non-contractual accounts behavioural study

I need to carry a non-contractual accounts behavoiural study for a bank. The objective is to estimate core/non core ratios and then bucket and ftp them. Any recipe where to start? I have 3yrs of historical data, daily closing balances. From what I googled I understand that I need some kind of seasonal vs growth trend segregation. But only guidelines, nothing in particular. Visually represented my data has (e.g. current accounts) very heavy seasonal bias with highs in shoulder seasons and lows in the festive seasons ;)). How to isolate it? How do I then calculate the true core/volatile ratio?

### StackOverflow

#### Export machine learning model

I am creating a machine learning algorithm and want to export it. Suppose i am using scikit learn library and Random Forest algorithm.

modelC=RandomForestClassifier(n_estimators=30)
m=modelC.fit(trainvec,yvec)

modelC.model

How can i export it or is there a any function for it ?

### StackOverflow

#### Which Machine learning approach should I use to find similarity between two words based on their meaning? [on hold]

I have been developing an application that involves finding the similarity between two words based on their meaning in some context. For example, 'Professor' and 'Lecturer' means the similar thing in context of university. Which machine leaning approach should I use for this problem?

### StackOverflow

#### Plotting classification decision boundary line based on perceptron coefficients

This is practically a repeat of this question. However, I want to ask a very specific question regarding plotting of the decision boundary line based on the perceptron coefficients I got with a rudimentary "manual" coding experiment. As you can see the coefficients extracted from a logistic regression result in a nice decision boundary line:

based on the glm() results:

(Intercept)       test1       test2
1.718449    4.012903    3.743903

The coefficients on the perceptron experiment are radically different:

bias     test1     test2
9.131054 19.095881 20.736352

To facilitate an answer, here is the data, and here is the code:

# DATA PRE-PROCESSING:
dat[,1:2] = apply(dat[,1:2], MARGIN = 2, FUN = function(x) scale(x)) # scaling the data
data = data.frame(rep(1,nrow(dat)), dat) # introducing the "bias" column
colnames(data) = c("bias","test1","test2","y")
data$y[data$y==0] = -1 # Turning 0/1 dependent variable into -1/1.
data = as.matrix(data) # Turning data.frame into matrix to avoid mmult problems.

# PERCEPTRON:
set.seed(62416)
no.iter = 1000                           # Number of loops
theta = rnorm(ncol(data) - 1)            # Starting a random vector of coefficients.
theta = theta/sqrt(sum(theta^2))         # Normalizing the vector.
h = theta %*% t(data[,1:3])              # Performing the first f(theta^T X)

for (i in 1:no.iter){                    # We will recalculate 1,000 times
for (j in 1:nrow(data)){               # Each time we go through each example.
if(h[j] * data[j, 4] < 0){         # If the hypothesis disagrees with the sign of y,
theta = theta + (sign(data[j,4]) * data[j, 1:3]) # We + or - the example from theta.
}
else
theta = theta                      # Else we let it be.
}
h = theta %*% t(data[,1:3])            # Calculating h() after iteration.
}
theta                                    # Final coefficients
mean(sign(h) == data[,4])                # Accuracy

QUESTION: How to plot the boundary line (as I did above using the logistic regression coefficients) if we only have the perceptron coefficients?

### CompsciOverflow

#### How to connect the math of recurrence relations to daily programming concepts

What exactly are we doing from a CS perspective when we solve a recurrence relation and find a resulting formula for a sequence given a set of initial conditions? I just went through the "linear homogeneous recurrence relations of degree k with constant coefficients" bit in discrete math and basically understand the math part and have a simple process for solving them mechanically.

What I haven't seen yet is an explanation of what this correlates to in a CS sense. I understand we will encounter recurrence relations in algorithms which we haven't reached yet (next class) but I'm wondering what exactly do the initial conditions and the sequence represent?

For a trivial programming example, if we were to write a recursive function to process a directory and all its subdirectories, I understand conceptually that could be modeled as a recurrence relation because it is recursive, but I don't know how, and I don't know what the initial conditions and final formula for the sequence would represent in such a scenario.

Here's an example of the type of relation I'm talking about. So we solve these by taking the relation down into its characteristic equation, finding the roots, and then building a system of equations from the initial conditions $a_0, ..., a_j$ that we solve to find the constants. Finding the constants gives a closed formula for that particular sequence defined by those initial conditions.

My question is, from a CS/programming/software engineering perspective what would we model using recurrence relations like this other than algorithms, and what would the initial conditions represent in those models?

### QuantOverflow

#### Market microstructure by Mark B. Garman (J. Financial Economicss 3, 257-275, 1976)

In Garman's inventory model, buying order and selling order are poisson process with order size = 1. Buying price and selling price are denoted by pb and ps, that is, the market maker gets pb when she sells a stock to the others, and spends ps to buy a stock from the others.

Garman than calculates the probability of the inventory of the market maker, says Q(k, t+dt) = probability to get 1 dollar x Q(k-1, t) + probability to lose 1 dollar x Q(k+1, t) + no buying or selling order x Q(k, t), where Q(k, t+dt) = probability to have k money at time t+dt.

In the above equation, I think Garman had split the money received and loss by buying or selling a shock in many sub-poisson process, otherwise, getting 1 dollar or losing 1 dollar are impossible, as market maker receive pb dollar and loses ps dollar in each order, but not 1 dollar. Do my statement correct? Thank you very much.

### CompsciOverflow

#### can i print a geometric structures using cpp program in my cppdroid android app? [on hold]

I tried to display proper geometric figures but have don't have proper code to display it in my android mobile.I want to display geometric figures like straight line.I request you to give guidance about code in my cppdroid android app.Here I want c++ program to make this geometric structure. So please help me.

### CompsciOverflow

#### Bootcamp or self-teach? [on hold]

Who out there is experienced a little bit with both and might be able to provide an answer here? I'm finding it very difficult to self-motivate at home, push through the walls, and even though it's a pretty penny for the bootcamps they same like they're that way for a reason.

Ha, my fault then. I actually spent a long time trying to conform to the guidelines of SE since I'm new but I guess I missed it still, sorry about that I just couldn't resist asking.

### StackOverflow

#### Manual Perceptron example in R - are the results acceptable?

I am trying to get a perceptron algorithm for classification working but I think something is missing. This is the decision boundary achieved with logistic regression:

The red dots got into college, after performing better on tests 1 and 2.

This is the data, and this is the code for the logistic regression in R:

colnames(dat) = c("test1","test2","y")
plot(test2 ~ test1, col = as.factor(y), pch = 20, data=dat)
fit = glm(y ~ test1 + test2, family = "binomial", data = dat)
coefs = coef(fit)
(x = c(min(dat[,1])-2,  max(dat[,1])+2))
(y = c((-1/coefs[3]) * (coefs[2] * x + coefs[1])))
lines(x, y)

The code for the "manual" implementation of the perceptron is as follows:

# DATA PRE-PROCESSING:
dat[,1:2] = apply(dat[,1:2], MARGIN = 2, FUN = function(x) scale(x)) # scaling the data
data = data.frame(rep(1,nrow(dat)), dat) # introducing the "bias" column
colnames(data) = c("bias","test1","test2","y")
data$y[data$y==0] = -1 # Turning 0/1 dependent variable into -1/1.
data = as.matrix(data) # Turning data.frame into matrix to avoid mmult problems.

# PERCEPTRON:
set.seed(62416)
no.iter = 1000                           # Number of loops
theta = rnorm(ncol(data) - 1)            # Starting a random vector of coefficients.
theta = theta/sqrt(sum(theta^2))         # Normalizing the vector.
h = theta %*% t(data[,1:3])              # Performing the first f(theta^T X)

for (i in 1:no.iter){                    # We will recalculate 1,000 times
for (j in 1:nrow(data)){               # Each time we go through each example.
if(h[j] * data[j, 4] < 0){         # If the hypothesis disagrees with the sign of y,
theta = theta + (sign(data[j,4]) * data[j, 1:3]) # We + or - the example from theta.
}
else
theta = theta                      # Else we let it be.
}
h = theta %*% t(data[,1:3])            # Calculating h() after iteration.
}
theta                                    # Final coefficients
mean(sign(h) == data[,4])                # Accuracy

With this, I get the following coefficients:

bias     test1     test2
9.131054 19.095881 20.736352

and an accuracy of 88%, consistent with that calculated with the glm() logistic regression function: mean(sign(predict(fit))==data[,4]) of 89% - logically, there is no way of linearly classifying all of the points, as it is obvious from the plot above. In fact, iterating only 10 times and plotting the accuracy, a ~90% is reach after just 1 iteration:

Being in line with the training classification performance of logistic regression, it is likely that the code is not conceptually wrong.

QUESTIONS: Is it OK to get coefficients so different from the logistic regression:

(Intercept)       test1       test2
1.718449    4.012903    3.743903

### Wes Felter

#### Kazuho Oku: Developing the fastest HTTP/2 server

Kazuho Oku: Developing the fastest HTTP/2 server:

(Hint: It’s not Nginx or Go hipsterware.)

#### "Everybody’s OpenStack software is equally bad. It’s also as bad as all the other infrastructure..."

“Everybody’s OpenStack software is equally bad. It’s also as bad as all the other infrastructure software out there – software-defined networking, software-defined storage, cloud management platforms, platforms-as-service, container orchestrators, you name it. It’s all full of bugs, hard to upgrade and a nightmare to operate. It’s all bad.”

- Boris Renski, Mirantis

### CompsciOverflow

#### Efficient algorithms for checking non-emptiness of the language of a Turing machine

I know that language non-emptiness is TM recognizable, and one can perform a BFS to find an input string that TM accepts, if there is any. But, what is the most efficient algorithm for that?

### TheoryOverflow

#### Structured set of binary words

Definitions:

Let $n\in \mathbb N$ be an integer, and consider the field $\mathbb K=GF(2^n)$.

For $c\in \mathbb N$, let $S_c$ be a set of $n$ elements from $\mathbb K$ such that:

1. Every element $e$ from $S$ is balanced: its weight $|e|=n/2$ (there are as many $1$s as $0$s).
2. Every pair of distinct elements $e,e'\in S, e\neq e'$ are at distance a multiple of $c$. That is: $$\forall (e,e')\in S^2, e\neq e', \exists k \in \mathbb N, |e\oplus e'|=k\cdot c$$

Observations:

• If the set $S_c$ could contain 0, 1, or 2 elements, its construction is trivial.
• For some values of $c$, there are no solutions.

Questions:

1. Does this set structure has a name?
2. Are there algorithms to construct $S_c$?
3. For fixed $(n,c)$, how many sets $S_c$ exist?
4. This question seems related to binary coding theory where the minimal distance is replaced by codewords evenly located in the space. Is there a way to express the problem into a code problem?

### QuantOverflow

#### Expected option return in MATLAB

The expected return of an option is given by its expected payoff under $P$ over its market price under $Q$.

For the Black-Scholes model, expected call option return is given as (see here):

$$E(R)=\frac{E^P[(S_T-K)^+]}{e^{-rT}E^Q[(S_T-K)^+]}=\frac{e^{\mu \tau}[S_tN(d_1^*)-e^{\mu \tau}KN(d_2^*)]}{C_t(r,T,\sigma,S,K)}-1$$

$$\text{with }d_1^*=\frac{\ln S_t/K+(\mu+\frac{1}{2}\sigma^2)\tau}{\sigma\sqrt{\tau}},\qquad d_2^*=d_1^*-\sqrt{\tau}\sigma$$

I implemented the $P$-payoff in MATLAB as

E(R) = exp((mu-d)*T)*blsprice(S, K, mu, T, sigma,d)

and get correct values (comparing with other studies).

However, I also tried to calculate out the expectation integral numerically in MATLAB as follows:

E(R2) = integral(@(S_T)max(S_T-K,0).*normpdf(log(S_T),mu-d-sigma^2/2,sigma),0,inf)

(with some arbitrary parameters) and I get a different value.

Can someone explain whether there is error in my code for E(R2), or is MATLAB integration just not accurate enough?

E.g. try with

S=1,T=1,K=1,r=0.01,mu=0.1,sigma=0.05,d=0.02

#### How to automatically get all options data for a particular stock into microsoft excel?

I'm looking for a way to get the entire options chain (All options expiries) for a particular stock in excel without manually copy pasting anything. It does not have to be real time and I will only be searching for 1 stocks options at a time. A free method would be ideal.

### Lobsters

#### Best Practices for Designing a Pragmatic RESTful API

This was submitted in 2013 but lobste.rs is more lively now. Hopefully some new people will see this and ideally we can discuss some points.

## June 24, 2016

### QuantOverflow

I know the construction of Black-Scholes model and how do we solve it for an Implied Volatility. But in general, which option price do Softwares use to come up with Implied Vol for Overal Stock, let's say $AMZN?(When I look up for a symbol, it says AMZN has IV of 44%). I have an assumption that it doesn't have to do with option prices, rather it just an expected move in terms of 1 Standard Dev. Please tell me if I'm wrong ### Lobsters #### Penetration Testing Tools A list of tools and example commands. Comments ### Fefe #### Die Gegenden, die wirtschaftlich am meisten von der ... Die Gegenden, die wirtschaftlich am meisten von der EU abhängen, haben am stärksten für Brexit gestimmt. Das ist ja wie dieser alte Scherz aus dem Einzelhandel! Das Leben könnte so schön sein, wenn wir nur diese stressigen Kunden loswerden könnten! Auch sonst riecht das nach Klassenkampf. ### Halfbakery #### AILand (0.0) ### CompsciOverflow #### What are some approaches to solve these classes of problems efficiently Good day, Please consider the following problem: 3 friends named Alice, Bob and Cindy have 3 food items (cheese, tomato, bread) in the fridge. Each person has a particular numerical preference score (any positive number) for each food item. It has been agreed upon that each person should take one and exactly one food item. (cheese bread and tomato can not be broken up to pieces) we are interested in coming up with a matching that would maximising the total happiness. Here is a concrete example: Foods Vector: [Cheese, Tomato, Bread] preferance vector: (Preference score for Cheese, Tomato and Bread in order) Alice (4,3,3) Bob (2,4,5) Cindy (5,1,3) brute force: try all combos //Cheese belongs to Alice, Tomato belongs to Bob, Bread belongs to Cindy... total score for [Alice,Bob,Cindy]: 11 total score for [Alice,Cindy,Bob]: 8 total score for [Bob,Cindy,Alice]: 6 total score for [Bob,Alice,Cindy]: 8 total score for [Cindy,Bob,Alice]: 12 total score for [Cindy,Alice,Bob]: 13 //best solution It looks like we should give Cheese to Cindy, Tomato to Alice, and Bread to Bob for the greater good of the republic. Of course the naive brute force method is acceptable here because n=3, however if we have 100 people and 100 food items this method would be prohibitively expensive. it seems to me that n! will be the asymptotic complexity which is worse than exponential. I attempted to relate this problem to other well known problems like knapsack or Interval scheduling with no success. I would appreciate informed input on the type of this problem and if there are other approaches to find the optimal or good approximation solution in reasonable time. I have come up with a greedy algorithm that can do well in some cases, here it is: Cheese Tomato Bread Alice 4 3 3 Bob 2 4 5 Cindy 5 1 3 1- for each Person, find the Food among remaining foods that is preferred, assign that food to the person. 2- eliminate the chosen food from the list of available foods 3- repeat until you are left with the last person in my going step by step it would look like this: Step 1: Cheese Tomato Bread Alice *4 - - Bob - 4 5 Cindy - 1 3 Step 2: Cheese Tomato Bread Alice *4 - - Bob - - *5 Cindy - 1 - Step 3: Cheese Tomato Bread Alice *4 - - Bob - - *5 Cindy - *1 - This will take n steps as opposed to n!, and in this case gives us a reasonable answer (10) but in other situations the discrepancy between optimal solution and greedy solution is huge: preferance vector: (Preference score for Cheese, Tomato and Bread in order) Alice (3,2,1) Bob (9,1,2) Cindy (5,0,9) brute force: try all combos total score for [Alice,Bob,Cindy]: 13 total score for [Alice,Cindy,Bob]: 5 //greedy!!! total score for [Bob,Cindy,Alice]: 10 total score for [Bob,Alice,Cindy]: 20 //best total score for [Cindy,Bob,Alice]: 7 total score for [Cindy,Alice,Bob]: 9 ### QuantOverflow #### VEC GARCH (1,1) for 4 time series I have to estimate a VEC GARCH(1,1) model in R. I already tried rmgarch, fGarch, ccgarch, mgarch, tsDyn. Has somebody estimated a model like that? library(quantmod) library(fBasics) library(rmgarch) library(fGarch) library(parallel) library(ccgarch) library(mgarch) #from github vst/mgarch library(tsDyn) library(ggplot2) #load data, time series closing prices, 10 year sample #DAX 30 getSymbols('^GDAXI', src='yahoo', return.class='ts',from="2005-01-01", to="2015-01-31") GDAXI.DE=GDAXI[ , "GDAXI.Close"] #S&P 500 getSymbols('^GSPC', src='yahoo', return.class='ts',from="2005-01-01", to="2015-01-31") GSPC=GSPC[ , "GSPC.Close"] #Credit Suisse Commodity Return Strat I getSymbols('CRSOX', src='yahoo', return.class='ts',from="2005-01-01", to="2015-01-31") CRSOX=CRSOX[ , "CRSOX.Close"] #iShares MSCI Emerging Markets getSymbols('EEM', src='yahoo', return.class='ts',from="2005-01-01", to="2015-01-31") EEM=EEM[ , "EEM.Close"] #calculating log returns of the time series log_r1=diff(log(GDAXI.DE[39:2575])) log_r2=diff(log(GSPC)) log_r3=diff(log(CRSOX)) log_r4=diff(log(EEM)) #return matrix r_t=data.frame(log_r1, log_r2,log_r3, log_r4) #GARCH estimation #eGarch(1,1), not multivariate #Vec Garch(1,1) Est1=VECM(r_t,lag=1, estim="ML" ) print(Est1) I think the VECM operator isn't useful for my purpose since I need a martrix of 4x4 for alpha and one 4x4 for beta plus 4x1 vector for omega. Can somebody help with a package or code? ### StackOverflow #### How do I get the current time in Elm 0.17? I had asked this question already: How do I get the current time in Elm? And answered it by writing my own variant of start-app: http://package.elm-lang.org/packages/z5h/time-app/1.0.1 Of course the Elm architecture has since changed, and my old way of doing things no longer works, because there are no signals or Time.timestamp. So.... Suppose I build an app with the standard update function signature: update : Msg -> Model -> (Model, Cmd Msg) I'd like to timestamp my model with the time at update. One unacceptable almost-solution is to subscribe to Time.every. Conceptually this is not what I want. This is updating the model with time and also separately updating model with messages. What I want is to be able to write an update function with signature: updateWithTime : Msg -> Time -> Model -> (Model, Cmd Msg) I started trying to solve this by adding some extra messages: Msg = ... When | NewTime Time And creating a new command: timeCmd = perform (\x -> NewTime 0.0) NewTime Time.now So in any action, I can fire off an extra command to retrieve the time. But this gets messy and out of hand quickly. Any ideas on how I can clean this up? ### CompsciOverflow #### Share PDF files with permissions [on hold] in my office we share PDF Files company wide via email but they have to email us back to say who they're sending the PDF to. Is there a way to automate this process maybe using an online hosting solution or a shared drive? E.g., we have a shared drive that when users try to access a pdf file they're prompted with the question "Who will you be sending this to?" and then that response is then logged in a text file (or excel, word, etc doesnt really matter). Thank you!! ### StackOverflow #### Spark: Measuring performance of ALS I am using the ALS model from spark.ml to create a recommender system using implicit feedback for a certain collection of items. I have noticed that the output predictions of the model are much lower than 1 and they usually range in the interval of [0,0.1]. Thus, using MAE or MSE does not make any sense in this case. Therefore I use the areaUnderROC (AUC) to measure the performance. I do that by using the spark's BinaryClassificationEvaluator and I do get something close to 0.8. But, I cannot understand clearly how that is possible, since most of the values range in [0,0.1]. To my understanding after a certain point the evaluator will be considering all of the predictions to belong to class 0. Which essentially would mean that AUC would be equal to the percentage of negative samples? In general, how would you treat such low values if you need to test your model's performance compared to let's say Logistic Regression? I train the model as follows: rank = 25 alpha = 1.0 numIterations = 10 als = ALS(rank=rank, maxIter=numIterations, alpha=alpha, userCol="id", itemCol="itemid", ratingCol="response", implicitPrefs=True, nonnegative=True) als.setRegParam(0.01) model = als.fit(train) ### Lambda the Ultimate Forum #### how to design PL support for effects emerging from parallel non-determinism? Let's make an on-topic place to discuss programming language relevant issues in concurrency and non-determinism. Software development in general isn't really on topic. It really needs to be about an aspect of PL design or implementation, or else LtU isn't suitable. It seems folks designing a PL would like a language to be suitable for performance work, in a world where scaling better may imply doing more things in parallel, to get more hardware usage per unit of time in lieu of processors actually running faster. The plan is something like: well if cycles have stalled we can just use more cores. In general this seems to lead to unpredictability, which perhaps might be addressed by PL tech, somehow or other. Something is missing in how developers specify what they think code will do, in the large. It's a common belief it will be obvious. ("It should be obvious what the code does from reading it, so comments are superfluous.") This is slightly like thinking other people will recognize a tune you have in mind when you drum out its rhythm with your fingers, when in fact they will only hear you drumming your fingers. A pattern you expect to be obvious won't be. Maybe more than one layer of grammar would help. (Just an idea.) In addition to local language syntax, you could specify larger patterns of what will happen, in terms of expected patterns in future inter-agent transactions, whether they be objects or something else. There seems lack of current art in capturing developer intent about shape of event sequences. Sometimes I suspect a few people intend to fix this with determinism in language behavior, which doesn't seem a good fit for fallible distributed internet components. I have nothing to post unless a remark catches my eye. If I have something interesting to say, I will. (I don't blog any more, and I'm all but unreachable except here, unless you call my phone or email my work address. My online presence is near zero. I don't even check LinkedIn.) What specifically would you do to a programming language to address the emergent elephant that results from code specifying tail, legs, trunk, etc? ### QuantOverflow #### DCC GARCH - Specificating of ARCH and GARCH parameter Matrices STATA The command in STATA to calculate the DCC model of two variables is: mgarch dcc ( x1 x2=, noconstant) , arch(1) garch(1) distribution(t) $$\begin{bmatrix} h_1{t} \\ h_2{t} \end{bmatrix} = \begin{bmatrix} w_{10} \\ w_{20} \end{bmatrix} + \begin{bmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{bmatrix} \begin{bmatrix} \epsilon_{1t-1} \\ \epsilon_{2t-1} \end{bmatrix} + \begin{bmatrix} g_{11} & g_{12} \\ g_{21} & g_{22} \end{bmatrix} \begin{bmatrix} h_{1t-1} \\ h_{2t-1} \end{bmatrix}$$ When i give this command STATA understands that the ARCH and Garch matrices are diagonal,i.e, (a21= a12= g21= g12 = 0). How can i change this to implement a FULL ARCH and GARCH parameter matrices, to capture the spillover effects? ### Lobsters #### Launching the 2016 Nim Community Survey #### brexit(3): perform normal programme termination and remove the binary from the disc https://twitter.com/mbalmer/status/746387347823071232 I suggest a new function for the Unix standard C library: brexit(3): perform normal program termination and remove the binary from the disk. https://twitter.com/mbalmer/status/746388255483039744 Just if it wasn’t clear: brexit(3) stands for “binary remove exit”. Should we vote on it? Comments ### StackOverflow #### How to train a neural network with Probabilistic input and output Here is my problem: 1. Network has to decide among 4 classes of outputs in each trial. A classifier.(choose A,B,C or D) 2. Inputs are 1 or -1 for 6 features (6 features for each class) 3. However, inputs are generated probabilistically. (60% 1 / 40% -1 for feature 1, 70%1, 30% -1 for feature 2 and so on...) 4. Training output is a distribution of 4 classes after 100 trials (60% A, 20%B, 10%C, 10%D - we do not know the result for single trial) How to build and train neural network to replicate the training output ? ### Overcoming Bias #### Caplan Audits Age of Em When I showed Bryan Caplan an early draft of my book, his main concern was that I didn’t focus enough on humans, as he doesn’t think robots can be conscious. In his first critical post, he focused mainly on language and emphasis issues. But he summarized “the reasoning simply isn’t very rigorous”, and he gave 3 substantive objections: The idea that the global economy will start doubling on a monthly basis is .. a claim with a near-zero prior probability. .. Why wouldn’t ems’ creators use the threat of physical hunger, exhaustion, pain, sickness, grime, hard labor, or sudden unexpected death’ to motivate the ems? .. torturing’ ems, .. why not?” .. Why wouldn’t ems largely be copies of the most “robot-like” humans – humble workaholics with minimal personal life, content to selflessly and uncomplainingly serve their employers? He asked me direct questions on my moral evaluation of ems, so I asked him to estimate my overall book accuracy relative to the standard of academic consensus theories, given my assumptions. Caplan said: The entire analysis hinges on which people get emulated, and there is absolutely no simple standard academic theory of that. If, as I’ve argued, we would copy the most robot-like people and treat them as slaves, at least 90% of Robin’s details are wrong. Since I didn’t think how docile are ems matters that much for most of my book, I challenged him to check five random pages. Today, he reports back: Limiting myself to his chapters on Economics, Organization, and Sociology, [half of the book’s six sections] .. After performing this exercise, I’m more inclined to say Robin’s only 80% wrong. .. My main complaint is that his premises about em motivation are implausible and crucial. Caplan picked 23 quotes from those pages. (I don’t know how picked; I count ~35 claims.) In one of these (#22) he disputes the proper use of the word “participate”, and in one (#12) he says he can’t judge. In two more, he seems to just misread the quotes. In #21, I say taxes can’t discourage work by retired humans, and he says but ems work. In #8 I say if most ems are in the few biggest cities, they must also be in the few biggest nations (by population). He says there isn’t time for nations to merge. If I set aside all these, that leaves 19 evaluations, out of which I count 7 (#1,4,9,13,17,19,20) where he says agree or okay, making me only 63% wrong in his eyes. Now lets go through the 12 disagreements, which fall into five clumps. In #6, Caplan disagrees with my claim that “well-designed computers can be secure from theft, assault, and disease.” On page 62, I had explained: Ems may use technologies such as provably secure operating system kernels (Klein et al. 2014), and capability-based secure computing systems, which limit the powers of subsystems (Miller et al. 2003). In #5, I had cited sources showing that in the past most innovation has come from many small innovations, instead of a few big ones. So I said we should expect that for ems too. Caplan says that should reverse because ems are more homogenous than humans. I have no idea what he is thinking here. In #3,7, he disagrees with my applying very standard urban econ to ems: It’s not clear what even counts as urban concentration in the relevant sense. .. Telecommuting hasn’t done much .. why think ems will lead to “much larger” em cities? .. Doesn’t being a virtual being vitiate most of the social reasons to live near others? .. But em virtual reality makes “telecommuting” a nearly perfect substitute for in-person meetings, at least at close distances. And one page before, I had explained that “fast ems .. can suffer noticeable communication delays with city scale separations.” In addition, many ems (perhaps 20%) do physical tasks, and all are housed in hardware needing physical support. In #2,23, Caplan disagrees with my estimating that the human fraction of income controlled slowly falls, because he says all ems must always remain absolute slaves; “humans hold 100% of wealth regardless .. ems own nothing.” Finally, half of his disagreements (#10,11,14,15,16,18) stem from his seeing ems them as quite literally “robot-like”. If not for this, he’d score me as only 31% wrong. According to Caplan, ems are not disturbed by “life events”, only by disappointing their masters. They only group, identify, and organize as commanded, not as they prefer or choose. They have no personality “in a human sense.” They never disagree with each other, and never need to make excuses for anything. Remember, Caplan and I agree that the key driving factor here is that a competitive em world seeks the most productive (per subjective minute) combinations of humans to scan, mental tweaks and training methods to apply, and work habits and organization to use. So our best data should be the most productive people in the world today, or that we’ve seen in history. Yet the most productive people I know are not remotely “robot-like”, at least in the sense he describes above. Can Caplan name any specific workers, or groups, he knows that fit the bill? In writing the book I searched for literatures on work productivity, and used many dozens of articles on specific productivity correlates. But I never came across anything remotely claiming “robot-like” workers (or tortured slaves) to be the most productive in modern jobs. Remember that the scoring standard I set was not personal intuition but the consensus of the academic literature. I’ve cited many sources, but Caplan has yet to cite any. From Caplan, I humbly request some supporting citations. But I think he and I will make only limited progress in this discussion until some other professional economists weigh in. What incantations will summon the better spirits of the Econ blogosphere? ### Lobsters #### The Big Idea is "Messaging" - Alan Kay on Misunderstanding in OOP [1998] #### Dolphin 5.0 Release ### QuantOverflow #### weighted probability densities in matlab I have a set of variables (lets say a nx3 : 3 variables and n rows). I set the mean to be my current data (1x3); data for 3 variables as of today and set my covariance matrix as the identity matrix. I then calculate the probability density using the mnvpd function in matlab. In essence these probability densities are my "distances" from my current data (my mean variable) My question is if I want to compute a weighted probability density how do I do that? if i want to weight one variable 3x the others. Based on my most recent value(my Mean parameter) the last data point has the highest weight(closest distance). My question is how do I assign the 4th variable to have for example 3x more weight so that its reflected in my calculation of densities. X is my variable matrix, MU = [0.6638 -0.43 -1.56 0.45] X = 0.7926 -1.1549 -0.9966 0.0520 0.7399 -0.8464 -1.4008 0.1385 0.7428 -0.5986 -1.3788 0.1682 0.3965 -0.4491 -1.2558 0.2441 0.6638 -0.4265 -1.5430 0.4194 Y = mvnpdf(X, MU, eye(4)) Y = 0.0152 0.0218 0.0235 0.0228 0.0253 Y/sum(Y) = 0.1401 0.2004 0.2165 0.2101 0.2329 ### StackOverflow #### Neural network: activation function vs transfer function It seems there is a bit of confusion between activation and transfer function. From Wikipedia ANN: It seems that the transfer function calculates the net while the activation function the output of the neuron. But on Matlab documentation of an activation function I quote: satlin(N, FP) is a neural transfer function. Transfer functions calculate a layer's output from its net input. So who is right? And can you use the term activation function or transfer function interchangeably? ### Lobsters #### Why Cold War Warsaw Pact Tactics Work In Wargaming ### StackOverflow #### custom loss for mean absolute percentage error (MAPE) in xgboost For now I do some task about regression, and in this case I use MAPE metric. I really need to try to use xgbost with custom objective (because other custom in my case has bad scores) , but i don't understand how to implement same way as is in the guide Again, my question not about computing gradient for this metric, but how to implement it for xboost custom loss. E.g If I compute some-how partial derivatives i get two denetional vector vector for grad and hess, but according to guide I must return n sampes size array. Am i right? Thank you for any help! #### Learning OR gate through gradient descent I am trying to make my program learn OR logic gate using neural network and gradient descent algorithm. I took additional input neuron as -1 so that I can adjust threshold of neuron for activation later. currently threshold is simply 0. Here's my attempt at implementation #!/usr/bin/env python from numpy import * def pcntrain(inp, tar, wei, eta): for data in range(nData): activation = dot(inp,wei) wei += eta*(dot(transpose(inp), target-activation)) print "ITERATION " + str(data) print wei print "TESTING LEARNED ALGO" # Sample input activation = dot(array([[0,0,-1],[1,0,-1],[1,1,-1],[0,0,-1]]),wei) print activation nIn = 2 nOut = 1 nData = 4 inputs = array([[0,0],[0,1],[1,0],[1,1]]) target = array([[0],[1],[1],[1]]) inputs = concatenate((inputs,-ones((nData,1))),axis=1) #add bias input = -1 weights = random.rand(nIn +1,nOut)*0.1-0.05 #random weight if __name__ == '__main__': pcntrain(inputs, target, weights, 0.25) This code seem to produce output which does not seem like an OR gate. Help? ### QuantOverflow #### quantlib python : missing methods? I'm reading Introduction to Selected Classes of the QuantLib Library I by Dimitri Reiswich and tries to "convert" it to Python. It seems to me that some C++ possibilities aren't available in python. I'm not familiar with SWIG but I guess it's a matter of declaring them in the appropriate *.i files. For instance both these work following the pdf text: January: either QuantLib::January or QuantLib::Jan print(ql.Date(12, 12, 2015)) print(ql.Date(12, ql.January, 2015)) But why Jan doesn't work ? print(ql.Date(12, ql.Jan, 2015)) In the Calendar description the 2 followinng commented lines return an error, browsing through the code I failed at finding them. Would someone be kind enought to point me directions on how to make them available ? import QuantLib as ql def calendarTesting(): frankfurtCal = ql.Germany(ql.Germany.FrankfurtStockExchange) saudiArabCal = ql.SaudiArabia() myEve = ql.Date(31, 12, 2009) print('is BD: {}'.format(frankfurtCal.isBusinessDay(myEve))) print('is Holiday: {}'.format(frankfurtCal.isHoliday(myEve))) # print('is weekend: {}'.format(saudiArabCal.isWeekend(ql.Saturday))) print('is last BD: {}'.format(frankfurtCal.isEndOfMonth(ql.Date(30, 12, 2009)))) # print('last BD: {}'.format(frankfurtCal.endOfMonth(myEve))) calendarTesting() ### TheoryOverflow #### Super Mario Galaxy problem Suppose Mario is walking on the surface of a planet. If he starts walking from a known location, in a fixed direction, for a predetermined distance, how quickly can we determine where he will stop? More formally, suppose we are given a convex polytope$P$in 3-space, a starting point$s$on the surface of$P$, a direction vector$v$(in the plane of some facet containing$p$), and a distance$\ell$. How quickly can we determine which facet of$P$Mario will stop inside? (As a technical point, assume that if Mario walks into a vertex of$P$, he immediately explodes; fortunately, this almost never happens.) Or if you prefer: suppose we are given the polytope$P$, the source point$s$, and the direction vector$v$in advance. After preprocessing, how quickly can we answer the question for a given distance$\ell$? It's easy to simply trace Mario's footsteps, especially if$P$has only triangular facets. Whenever Mario enters a facet through one of its edges, we can determine in$O(1)$time which of the other two edges he must leave through. Although the running time of this algorithm is only linear in the number of edge-crossings, it's unbounded as a function of the input size, because the distance$\ell$could be arbitrarily larger than the diameter of$P$. Can we do better? (In practice, the path length isn't actually unbounded; there is a global upper bound in terms of the number of bits needed to represent the input. But insisting on integer inputs raises some rather nasty numerical issues — How do we compute exactly where to stop? — so let's stick to real inputs and exact real arithmetic.) Is anything nontrivial known about the complexity of this problem? Update: In light of julkiewicz's comment, it seems clear that a real-RAM running time bounded purely in terms of$n$(the complexity of the polytope) is impossible. Consider the special case of a two-sided unit square$[0,1]^2$, with Mario starting at$(0,1/2)$and walking in direction$(1,0)$. Mario will stop on the front or the back of the square depending on the parity of the integer$\lfloor \ell \rfloor$. We can't compute the floor function in constant time on the real RAM, unless we're happy equating PSPACE and P. But we can compute$\lfloor \ell \rfloor$in$O(\log \ell)$time by exponential search, which is an exponential improvement over the naive algorithm. Is time polynomial in$n$and$\log \ell$always achievable? ### StackOverflow #### Filtering and displaying values in GraphLab Sframe? So, I started working with Graphlab for my Machine learning class a week ago. I am still very new to Graphlab and i read through the API but couldn't quite get the solution I was looking for. So, here is the question. I have this data with multiple columns e.g- bedrooms,bathrooms,square ft,zipcode etc. These are basically the features and my goal is to work with various ML algorithms to predict the price of a house. Now, I am supposed to find the average price of the houses with zipcode - 93038. So, i broke down the problem into smaller bits as i am quite naive and decided to use my instincts. This is what i tried so far.Firstly, I tried to find a way to create a filter such that i can extract only the prices of the house with zipcode - 93038. import graphlab sf = graphlab.SFrame('home_data.gl') sf[(sf['zipcode']=='93038')] These showed me all the columns with zipcode 93038 but then i only want to display the price and zipcode column with value 93038. I tried so many different ways but just couldn't figure things out. Also, lets say i want to find the mean of the prices with zipcode value 93038.How do i do that? Thanks in advance. ### Halfbakery #### Chameleon cables (0.5) ### TheoryOverflow #### Numerical precision in sum-of-squares method? I have been reading a bit about the sum-of-squares method (SOS) from the survey of Barak & Steurer and the lecture notes of Barak. In both cases they sweep issues of numerical accuracy under the rug. From my (admittedly limited) understanding of the method, the following should be true: Given any system of polynomial equalities$E$over real-valued variables$x \in \mathbb{R}^n$, where all parameters are$O(1)$($n$,$|E|$, and degree of each constraint), the degree-"$2n$" ($=O(1)$) SOS method finds a satisfying assignment of the variables or proves none exists in$O(1)$time. My first question is whether the above claim is true (is there a naive argument that doesn't use SOS to solve this?). The second question is where numerical accuracy fits in. If I want to get an assignment that satisfies all constraints to within additive$\varepsilon$accuracy, how does the runtime depend on$1/\varepsilon$? In particular, is it polynomial? The motivation for this is to, say, apply a divide-and-conquer approach on a large system until the base case is an$O(1)$-size system. EDIT: From Barak-Steurer, it appears that the "degree$l$sum-of-squares algorithm" on p.9 (and the paragraphs leading up to it) all define problems for solutions over$\mathbb{R}$, and in fact the definition of a pseudo-distribution in section 2.2 is over$\mathbb{R}$. Now I am seeing from Lemma 2.2, however, that one is not guaranteed a solution/refutation at degree$2n$without binary variables. So I can refine my question a little bit. If your variables are not binary, the worry is that the sequence of outputs$\varphi^{(l)}$is not finite (maybe not even monotonic increasing?). So the question is: is$\varphi^{(l)}$still increasing? And if so, how far you have to go to get additive accuracy$\varepsilon$? Though this likely does not change anything, I happen to know my system is satisfiable (there is no refutation of any degree), so I am really just concerned about how large$l$needs to be. Finally, I am interested in a theoretical solution, not a numerical solver. #### Kth best problem that is NP-hard for K polynomial A Kth best problem is, given some constraint, to find a solution that has the Kth best value compared to all solutions that meet the constraint. One way to write this as a decision problem is to decide whether there are K solutions that achieve a given value. Garey and Johnson list several NP-hard Kth best problems: Kth Best Spanning Tree, Kth Shortest Path, Kth Largest Subset, and Kth Largest m-Tuple. However each of these problems can be solve in polynomial time if K is restricted to be polynomial in the size of the rest of the input. Is there a Kth best problem that is NP-hard even if K is restricted to be polynomial in the size of the rest of the input (but poly-time solvable for K=1)? The problem should be natural enough that other Kth best problems can reduce to it, or at least be a good example for how to show NP-hardness of a Kth best problem. ### CompsciOverflow #### Find a grammar for the language$a^n b^n c^{n+1}$I need to find a grammar for the language${ L = \{a^n b^n c^{n+1}, n\ge 0\} }$I came up with this solution but it's not right$S \rightarrow aAbCcc | cA \rightarrow aAb|\epsilonC \rightarrow cc|\epsilon$So what's the right grammar? ### QuantOverflow #### Derivative: Delta of a Down and Out Call Option with Barrier=Debt(K) I am trying to compute the derivative of this function with respect to V0: This is the price of a down and out call option, assuming the barrier equal to the level of debt K. In other terms, I need to compute the Delta of this DOC Option, in the case of Barrier=K (neither Barrier higher than K nor Barrier lower than K) and I cannot find this case anywhere in the literature. Furthermore, the derivative of the first two terms of the equation equals N(d1), the delta of a plain vanilla call option. Therefore, I just need the derivative of what is in the parenthesis [...] with respect to V_0. Can someone of you help me? Anything will be really appreciated! ### CompsciOverflow #### Computer Ethics and Software Implementations: protecting the right to spread the word anonymously *and permanently* It would be interesting how mathematical models of marketing strategies and simulations of their effects on society could be constructed. We all know of how rumors spread in older societies according to a logistic model. But in an era governed by people linked with social graphs on social networks and forums, such models are different. I wonder where I can find more information on these models. I can also see a future (let's hope I will be wrong), where there will be folks in place to block certain types of information. Thus, added on to our search engines, there will be algorithms to catalogue content in a very qualitative manner, using linguistical analysis of content, and block it. I wonder just how far we are at being able to understand just how good these filtering algorithms can be. In a sense, these are somewhat of an attack on democracy: democracy holds everyone should have the right to express themselves, but what about the right to be reached and be heated by others. It seems capitalism is the key determining factor, with money paid propagate information as broadly as possible and to target audiences. But what if a money making scheme, no matter how profitable, is evil. What tools do we, as computer scientists and people knowledgeable in related fields have at our disposal to combat this: • To combat explosive propagandas on the internet. • To combat the shutting down of voices that want to speak out but cannot be heated. • To allow such voices to remain anonymous so that they cannot be caught by evil parties and squandered for expressing themselves and their vote to others. • To allow the presence of such words to be persistent and not be deleted. Thank you for your answers. ### Fefe #### Komisch, plötzlich ist das mit dem Brexit doch gar ... Komisch, plötzlich ist das mit dem Brexit doch gar nicht mehr so eilig. Wollte ja auch niemand, die wollten nur Verhandlungsmasse, um sich von der EU für die Ewigkeit die Sonderstellung der Briten festschreiben zu lassen, die seit Gründung weniger einzahlen müssen als andere. #### Hey, so ein Brexit eignet sich fast so gut wie eine ... Hey, so ein Brexit eignet sich fast so gut wie eine Fußball-EM zum Burying. Und beides zusammen, gleichzeitig? Jackpot! #### Laut BBC-Brexit-Liveticker will Morgan Stanley 2000 ... Laut BBC-Brexit-Liveticker will Morgan Stanley 2000 Arbeitsplätze aus London nach Frankfurt und Dublin verlegen. Darauf angesprochen pullt Morgan Stanley einen "wer, iiiiiich?!?" und dementiert wunderbar überspezifisch: The American investment bank said it has no official plans #### Anlässlich TTIP gab es ja mal den Hinweis, dass in ... Anlässlich TTIP gab es ja mal den Hinweis, dass in der EU und in den USA andere Grundsätze gelten. In der EU muss ein Produkt nachweislich unschädlich sein, damit es verkauft werden darf. In den USA kann der geprellte Konsument ja später klagen. Kaum ein Produkt illustriert diesen Grundsatz so schön wie dieses hier. ### Lobsters #### 15 minutes with Docker Swarm Mode ### TheoryOverflow #### On the size of residue classes Let$n \in \mathbb{N}$be a odd number. Let$S \subseteq \{1,3,5,7,...,n-2,n\}$and$|S|$is even number. Let$R_i^k=\{a \mid a \in S \text{ } \&\text{ } a\equiv i \text{ }(mod \text{ } k)\}$where$0\leq i\leq k-1$.$\textbf{Question:}$What is the upper bound$N$on$k$such that$\forall S$such that$|S|>0$is even$\exists k \leq N \quad \exists i$such that$|R_i^k|$is odd number. One can see that$N \leq n+1$. What is best upper bound one can get? Is it possible to get$O(\sqrt{n})$or$O(n^{1/3})$or better. ### StackOverflow #### Bayesian Sets and Finding the Top Sets Bayesian Sets proposed by Ghahramani and Heller (http://mlg.eng.cam.ac.uk/zoubin/papers/bsets-nips05.pdf) provides a useful way to find sets of items based upon a query (eg dog would return cat, pet) and produces comparable output to google sets. I was wondering how the method could be used to find the top sets without first providing a word (item) as a query. Is anybody aware of active learning on Bayesian sets where the sets with the highest density are identified? #### How is 'match' implemented in Haskell's FGL to be O(1)? In Haskell's Functional Graph Library (FGL), most of the graph algorithms depend on the 'match' function, which, given a Node n and a Graph g, returns c & g', where c is the Context of the n, and g' is the rest of the graph (which contains no references to n). The only way I can see of doing this is be examining each of the contexts in g and removing any edges which refer to n and adding them to the context c. This would take linear time, I believe. Martin Erwig, who wrote the library, suggests in this paper that this transformation can be done in constant or at least sub-linear time. Can anyone explain to me how this is accomplished? ### QuantOverflow #### x13 Arima analysis with negative values I'm running x13 Arima analysis on a US GDP series to get the "trend" component. 2008-12-31 -8.2 2009-03-31 -5.4 2009-06-30 -0.5 2009-09-30 1.3 2009-12-31 3.9 2010-03-31 1.7 2010-06-30 3.9 2010-09-30 2.7 2010-12-31 2.5 2011-03-31 -1.5 2011-06-30 2.9 2011-09-30 0.8 As you can see the data has negative values and when trying to run the model on the data it complains: X13Error: WARNING: Automatic transformation selection cannot be done on a series with zero or negative values. What is the right way to run X13 when you have negative values in your series? (This question is related to http://stackoverflow.com/questions/38000059/statsmodels-x13error-warning-automatic-transformation-selection-cannot-be-do where I asked about the program settings) ### StackOverflow #### Should i start teaching myself Machine Learning as an Android developer? [on hold] I've seen lots of hype going around machine learning and sounds like one of the essential things a developer must have in the near feature as Google starts to put more effort in it. So do you think it's right to start learning, If yes, where and how should I start? thanks in advance! ### High Scalability #### Stuff The Internet Says On Scalability For June 24th, 2016 Hey, it's HighScalability time: A complete and accurate demonstration of the internals of a software system. If you like this sort of Stuff then please support me on Patreon. • 79: podcasts for developers; 100 million: daily voice calls made on WhatsApp; 2,000; cars Tesla builds each week; 2078 lbs: weight it takes to burst an exercise ball; 500 million: Instagram users; > 100M: hours watched per day on Netflix; 400 PPM: Antarctica’s CO2 Level; 2.5 PB: New Relic SSD storage; • Quotable Quotes: • Alan Kay: The Internet was done so well that most people think of it as a natural resource like the Pacific Ocean, rather than something that was man-made. When was the last time a technology with a scale like that was so error-free? The Web, in comparison, is a joke. The Web was done by amateurs. • @jaykreps: Actually, yes: distributed systems are hard, but getting 100+ engineers to work productively on one app is harder. • @adrianco: All in 2016: Serverless Architecture: AWS Lambda, Codeless Architecture: Mendix, Architectureless Architecture: SaaS • @AhmetAlpBalkan: "That's MS SQL Server running on Ubuntu on Docker Swarm on Docker Datacenter on @Azure" @markrussinovich #dockercon • Erik Darling: EVERYTHING’S EASY WHEN IT’S WORKING • @blueben: Bold claim by @brianl: Most of the best tech of the last 10 years has come out of work at Google. #VelocityConf • Joe: there is no such thing as a silver bullet … no magic pixie dust, or magical card, or superfantastic software you can add to a system to make it incredibly faster. Faster, better performing systems require better architecture (physical, algorithmic, etc.). You really cannot hope to throw a metric-ton of machines at a problem and hope that scaling is simple and linear. Because it really never works like that. • Eran Hammer: The web is the present and it’s a f*cking mess. Deal with it. • @etherealmind: If you believe in DevOps/NetOps you have to believe that leaving Europe is a difficult but better course of action. Small, fast & fluid • Sanfilippo: Redis is currently not good for data problems where write safety is very important. One of the main design sacrifices Redis makes in order to provide many good things is data retention. It has best effort consistency and it has a configurable level of write safety, but it’s optimized for use cases where most of the time, you have your data, but in cases of large incidents you can lose a little bit of it. • David Smith: The best time you are ever going to have to make a new app is when there's a new iOS update. Go through the diffs. Go through the What's New? Find something that couldn't be possible before and make an app around that. • @DanielEricLee: There was a timezone bug so I wrote a test and then the test failed because the CI runs in a different timezone and then I became a farmer • @jasongorman: Reminds me of someone I know who led a dev team who built something that won an award. None of team invited to awards bash. Only snr mgmt. • David Robinson: My advice to graduate students: create public artifacts • @cdixon: Because distributed clusters of commodity machines are more powerful. • @wmf: 128-port TORs have outrun the compute capacity of most racks, so putting two mini-TORs in 1U is a great idea. • msravi: I moved from AWS EC2 to Google Cloud a few days ago. Google really seems to have beaten AWS, at least in pricing and flexibility. On AWS (Singapore region) a 2-vCPU, 7.5G RAM instance costs$143/month (not including IOPS and bandwidth costs), while a similar one on GC works out to about $56/month. That's a massive difference. In addition, GC allows me to customize cores and RAM flexibly to a point, which is important for me. • Mobile Breakfast: What is clear that we will get rid of the classic circuit-switched technology and move to all IP networks fairly soon in the US. • Douglas Rushkoff: I think they all come down to how you can optimize your business or the economy for flow over growth; for the circulation of money rather than the extraction of money. • Alan Hastings~ [Species] that go into synchrony may be more subject to extinction because a single driver can trigger a collapse • @TechCrunch: More cars than phones were connected to cell service in Q1 http://tcrn.ch/28MLtmt by @kristenhg • @docker: "Nobody cares about #containers, it's the application that matters!" - @solomonstre, CTO @Docker #DockerCon • @cmeik: The most commercially successful NoSQL database: Lotus Notes. • Brittany Fleit: behavior-based push results in open rates 800 percent higher than immediate blasts. Personalizing a message based on individual actions garners much more engagement. • Dilbert, of course, nails it on AI. • How will hackers be stopped from using Return-oriented programming (ROP) to execute privilege escalation attacks? ROP was created when "clever hackers realized that they could successively invoke snippets of code at the end of existing subroutines as 'proxies' to accomplish the work they needed done." Randomizing memory locations didn't stop them so Intel created new hardware magic called Control-flow Enforcement Technology. Intel added a new "ENDBRANCH" instruction and created a new "shadow stack". It turns out the immense power and beauty of the stack in von neumann architectures is also a great weakness. The story of life. Steve Gibson with an inspiring deep dive on CET in Security Now 565 Don't miss all that the Internet has to say on Scalability, click below and become eventually consistent with all scalability knowledge (which means this post has many more items to read so please keep on reading)... ### QuantOverflow #### Valuation Method for CASH in S.06.02 QRTs Extract from the latest spec for C0150 (Valuation Method) of S.06.02: Identify the valuation method used when valuing assets. One of the options in the following closed list shall be used: 1 — quoted market price in active markets for the same assets 2 — quoted market price in active markets for similar assets 3 — alternative valuation methods 4 — adjusted equity methods (applicable for the valuation of participations) 5 — IFRS equity methods (applicable for the valuation of participations) 6 — Market valuation according to Article 9(4) of Delegated Regulation (EU) 2015/35 My question is - which value applies to cash holdings / CIC category 7? ### StackOverflow #### Model Selection for Multilinear Regression on Large Datasets in Java I have a very large dataset with more than 1 Million rows and 140 features (>30GB) and have already been able to run a linear regression on it in an optimal method. Since all of these features are not good predictors, I need to either shrink or get subsets of these features to further refine my model. I have already tried the following methods: 1. BIC/AIC - It requires the calculation of the RSS(Residual Sum Squared Error), which in turn requires parsing of all rows. 2. Ridge Regression - estimation of tuning parameter requires calculation of RSS again. 3. Lasso - It doesn't have a closed-form solution. But for obvious reasons, I can't parse my data again to calculate the sum-squared error and stuff. I currently have my XT* X Matrix and also my XT* Y matrix, where X represents my data. I cannot store the X matrix in memory at any point of time due to memory issues. Link for Reference ### QuantOverflow #### S0602 - whether to report Quantity (C0130) or Par Amount (C0140) for Money Market Funds? We have several positions in a money market fund (CIC IE43) to report in the S.06.02 QRT, and we receive source data for both Quantity & Par Amount. These are actually identical. Which one should be used, given that - per definitions below - we should use one or the other? Is Par amount 'relevant' in this case? C0130 Quantity Number of assets, for relevant assets. This item shall not be reported if item Par amount (C0140) is reported. C0140 Par amount Amount outstanding measured at par amount, for all assets where this item is relevant, and at nominal amount for CIC = 72, 73, 74, 75 and 79 if applicable. This item shall not be reported if item Quantity (C0130) is reported. ### CompsciOverflow #### How to prove intersection between languages L1 (belongs to NP) and L2 (belongs to P) actually belongs to NP? I have to prove that if L <=p L1 intersection L2, where L1 and L2 are described as above, L belongs to NP. I thought about the definitions of P and NP and built a DTM D that decides L2 and a NTM N that decides L1. Then I tried to build a NTM NN that accepts when both D and N accept on some input w. Now,did I really prove that intersection between L1 and L2 is in NP by constructing NTM NN? Thx in advance #### Is it okay to select the best performers of test cases for scientific publication in neural network machine learning If I split my data properly into 75% train, 15% test, and 15% validation, and there are over 100,000 samples, is it appropriate for me to train 100s of neural networks then select only a couple based on their testing set grades for publication in a research article? The question is for neural network training in which the random initialization of weights can drastically affect test grades. Network parameters like the number of neurons, number of layers, etc all can change how the testing grade is. These are all things that are easy to modify, rather than data preparation which can take a long time. Do I need to resplit my data every time I train a new network? What strategy do most NN theorists use? #### L ={a^n.b^n | n>=0} , what is difference between L^2 and L.L? which option is correct? please explain why other are wrong if any one is correct ### StackOverflow #### What is the exact meaning of the equal sign in Elixir? I don't get what exactly means the equal sign in Elixir. What is unclear is that it looks like a mix between assignment and a pattern matching operation. iex(1)> x=4 4 iex(2)> y=5 5 iex(3)> 3=y ** (MatchError) no match of right hand side value: 5 iex(3)> y=3 3 iex(4)> y=x 4 I understand that in Elixir, the equals operator means to match the left side of the = sign to the the right side. First two lines make sense to me. x and y are unbound variables, hence they could match anything. They are bound as they match. Consequently, I understand the third line. You can't match 3 with 5. Where I start to loose my head is why the hell the two last lines are executed without giving the same error. It looks like the equal sign is back to being an assignment operator only. I've try to accept this behaviour as a fact without full understanding and tried to go further in the learning of the language. But as pattern matching is one of the core mechanism of Elixir, I'm constantly lock and feel I should go back to this original question. I will not go any further before I fully understand what exactly happens with the "=" sign and what is the logic. #### How to Interpret roc_curve(Test,Predictions) in scikit-learn I am working with scikit-learn for classification problem to predict Win or Loss of an opportunity. I used the piece of code: fpr, tpr, thresholds =roc_curve(yTest,predictions) And the result is: (array([ 0. , 0.2628946, 1. ]), array([ 0. , 0.73692477, 1. ]), array([2, 1, 0])) I am aware of calculating the AUC using the fpr, tpr for various thresholds varying in the range (1,0). Ideally, what I know is thresold should be in between 1 and 0. But, here the threshold values are 2,1,0. What to understand from this and how to interpret this. The sample code looks fine: import numpy as np from sklearn import metrics y = np.array([1, 1, 2, 2]) scores = np.array([0.1, 0.4, 0.35, 0.8]) fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2) >>>fpr array([ 0. , 0.5, 0.5, 1. ]) >>>tpr array([ 0.5, 0.5, 1. , 1. ]) >>> thresholds array([ 0.8 , 0.4 , 0.35, 0.1 ]) My predict_proba(yTest) are: [ 0.09573287 0.90426713] [ 0.14987409 0.85012591] [ 0.16348188 0.83651812] ..., [ 0.13957409 0.86042591] [ 0.04478675 0.95521325] [ 0.03492729 0.96507271] ### CompsciOverflow #### CFG Equivalent of regular expressions So I was wondering something about the Chomsky hierarchy. DFAs (and NFAs) accept regular languages, while NPDAs accept context-free languages. Right-regular or left-regular grammars produce regular languages, while context-free grammars produce context-free languages. Regular expressions represent regular languages - but is there a context-free equivalent? i.e. is there some additional term we could allow to appear in a regular expression such that regular expressions with that functionality now represent context-free languages? ### StackOverflow #### Manually plotting straight boundary line in Perceptron [The material belongs to the Coursera Machine Learning course by Andrew Ng] I got one of the exercises to work in R (I could have opted for Python - not essential to the question), using different methodology, and got the following plot with the boundary decision line on it: The red points were admitted to a college, while the rest were not. The question is not a how-to get the line on the plot, but rather why does the following line in the code adapted to R from the course materials works: y = c((-1/coefs[3]) * (coefs[2] * x + coefs[1])) So it is in reality asking about the math underpinning this command. The coefficients correspond to the logistic regression coefficients. Here is the dataset, and here is the entire code: dat = read.csv("perceptron.txt", header=F) is.data.frame(dat) colnames(dat) = c("test1","test2","y") head(dat) plot(test2 ~ test1, col = as.factor(y), pch = 20, data=dat) fit = glm(y ~ test1 + test2, family = "binomial", data = dat) coefs = coef(fit) (x = c(min(dat[,1])-2, max(dat[,1])+2)) (y = c((-1/coefs[3]) * (coefs[2] * x + coefs[1]))) lines(x, y) ### Lobsters #### FBI’s use of Tor exploit is like peering through “broken blinds” ### CompsciOverflow #### Average prefix code length of every 4-sized frequency vector is bounded at 2 I'm trying to show that for every frequency vector$(p_1, p_2, p_3, p_4)$such that$\sum_{i=1}^4 p_i=1$, the average word length outputted by Huffman algorithm is bounded at 2: If$(w_1,w_2,w_3,w_4)$is the outputted code, then$\sum_{i=1}^4 p_i |w_i| \le 2$. I've tried looking at the tree that is generated by Huffman algorithm, but the thing is that several different tree structures match different 4-sized frequency vectors and I can't tell something general about all of them. Also, is there a more general theorem for$k, n$(here$k=4, n=2$)? ### StackOverflow #### Predicting price using previous prices with R and Neural Networks (neuralnet) Within the R Neural Network page, I am using the neural network function to attempt to predict stock price. Training data contains columns High,Low,Open,Close. myformula <- close ~ High+Low+Open neuralnet(myformula,data=train_,hidden=c(5,3),linear.output=T) My question, Given the below data example could you tell me what the formula would look like. I have a table with columns "High","Low","Open","Close" it has two rows of values, each row represents a candle stick for the day. So the two rows in the data are candle sticks for the previous two days.My aim is tp predict what the next candle stick is i.e. "Open","High","Low","Close" given the previous two candlesticks. My neural network will be presented with the previous dtata 1 candle stick at a time. I want to know what the next candlestick is, so what would my R formula look like. Thanks Let me know ### DragonFly BSD Digest #### Reminder: sometimes VESA is better This is limited to some users of specific Intel video chipsets, but: if you get odd screen artifacts in X, the ‘vesa’ driver may work just fine for you. Or turn acceleration off. Or set ‘drm.i915.enable_execlists=0’ according to zrj on #dragonflybsd. (Updated to reflect all the answers in the thread and elsewhere.) ### CompsciOverflow #### What is the regular expression of this language? [duplicate] This question already has an answer here:$\Sigma = \{0, 1\}L = \{x$|$x \in \Sigma^* \& \#_0(x) = 3or \#_1(x) = 3 \}$What is the regular expression of this language? At first I thought$r = (0+1)^*0(0+1)^*0(0+1)^*0 or r = (0+1)^*1(0+1)^*1(0+1)^*1$And what is it's corresponding NFA/DFA? I've been puzzled by this language and if it's regular for a while now. ### StackOverflow #### Java 8 Issue trying to add Function FunctionalInterface invocation when using Inheritance and Generics I've been learning Java 8 features, and so far have had success implementing them, but my latest code throws up two problems. The first problem can be solved by casts, but this shouldn't happen here as the type inheritance should be sound; This is when accepting a one argument constructor parameter, where the constructor is a Function FunctionalInterface Method Reference. Related is an issue where Java cannot resolve a concrete declared subtype for a generic parameter as valid, when populating a map with a Method Reference, if this type is used as the type of the constructor argument at invocation of the Method Reference. The code that follows uses a Separated Interface inheritance tree, the interfaces specification, and the interfaces implementation, for two types of object Node, and Edge. Edges have generic parameters to define the start Node and end Node specified when creating the method reference. Both Nodes and Edges can have custom subtypes, which have a derived interface (subtype) implemented by the implementation subtype. Nodes contain relationships as Sets which can be Concrete Node references, or Concrete edge references. (the startNode should match the parent object holding the set of Edges, but is required as a member of the edge, and this is beyond my control) To avoid massive duplicated code, access is via an Operations interface The Operations interface also has a separated interface and concrete implementation, and has two types (Node and Edge) to support the generics needed for Edges, one is created for each relationship set in each ImplExtendedNode which has relationships. This modification is to move the storage of the creator method reference back into the class which knows which concrete types to use, and has permissions to access the Sets, rather than passing the custom Method Reference into each Operations constructor, and then passing it back for invocation. The problem is very specific to the Edge (Constructor) Method Reference, and the inability correctly resolve that the types are valid and meet the requirements of the generic type constraints. The system is part of a batch process. I've taken out everything unimportant, and I really appreciate helping me learn more about generics and java 8's newish features for anybody who understands what I have got wrong. //Node Interfaces public interface Node { ... } public interface DefaultNode extends Node { ... public <S extends Object & DefaultNode> Optional<S> createRelationship(NodeOperations nodeOps); public <R extends Object & DefaultEdge, T extends Object & DefaultNode> Optional<R> createRelationship(EdgeOperations edgeOps); ... } //Edge Interfaces public interface Edge<S extends Object & Node, T extends Object & Node> { ... } public interface DefaultEdge<S extends Object & Node, T extends Object & Node> extends Edge<S, T> { ... } //Implementation Classes //(base objects) public abstract class ImplNode implements Node { ... } public abstract class ImplEdge<S extends Object & Node, T extends Object & Node> implements Edge<S, T> { ... //constructor(s) public ImplEdge(S s) { ... } ... } //(provide basic functions) public abstract class ImplDefaultNode extends ImplNode implements DefaultNode { //holds method reference to custom node/edge constructors in child implementation class definitions (multiple per class) protected Map<NodeOperations, Supplier> nodeSuppliers; protected Map<EdgeOperations, Function> edgeSuppliers; //this works fine @Override public <S extends Object & DefaultNode> Optional<S> createRelationship(NodeOperations nodeOps) { ... Supplier<S> f = this.nodeSuppliers.get(nodeOps); S relationship = f.get(); ... } //issue one, cannot automatically cast type of "this" to expected type of T in f.apply(this) - I know this should be possible in most cases and usually points to an issue with the code @Override public <R extends Object & DefaultEdge, T extends Object & DefaultNode> Optional<R> createRelationship(EdgeOperations edgeOps) { ... Function<T, R> f = this.edgeSuppliers.get(edgeOps); R relationship = f.apply(this); ... } } public abstract class ImplDefaultEdge<S extends Object & Node, T extends Object & Node> extends ImplEdge<S, T> implements DefaultEdge<S, T> { private S startNode; private T endNode; ... } //(provides custom extensions which rely on ImplDefaultNode) //custom implementation of interface derived and defined from DefaultNode public class ImplExtendedNode extends ImplDefaultNode implements ExtendedNode { ... private Set<DifferentImplExtendedNode1> nodeRels1; ... private Set<ImplExtendedEdge<ImplExtendedNode, DifferentImplExtendedNode2>> edgeRels1; //this works when called via create function in superclass through generic map lookup @Override public NodeOperations morphNodeRels1() { ... this.nodeSuppliers.put(i, DifferentImplExtendedNode1::new); ... } //this throws compiler error - //Cannot convert java.lang.Object to ImplExtendedNode //It only works if the first generic type is ImplNode, or ImplDefaultNode I think) @Override public EdgeOperations morphEdgeRels1() { ... this.edgeSuppliers.put(i, ImplExtendedEdge<ImplExtendedNode, DifferentImplExtendedNode2>::new); ... } } ### Lobsters #### Node streams - a fractal of weird design ### Fefe #### Haha, mir hat jemand eine Mail von einer britischen ... Haha, mir hat jemand eine Mail von einer britischen Firma an ihre deutschen Kunden geforwardet. Inhalt: Keine Sorge, wir machen rechtzeitig eine Niederlassung in der EU auf. ### StackOverflow #### Keras accuracy does not change I have a few thousand audio files and I want to classify them using Keras and Theano. So far, I generated a 28x28 spectrograms (bigger is probably better, but I am just trying to get the algorithm work at this point) of each audio file and read the image into a matrix. So in the end I get this big image matrix to feed into the network for image classification. In a tutorial I found this mnist classification code: import numpy as np from keras.datasets import mnist from keras.models import Sequential from keras.layers.core import Dense from keras.utils import np_utils batch_size = 128 nb_classes = 10 nb_epochs = 2 (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape(60000, 784) X_test = X_test.reshape(10000, 784) X_train = X_train.astype("float32") X_test = X_test.astype("float32") X_train /= 255 X_test /= 255 print(X_train.shape[0], "train samples") print(X_test.shape[0], "test samples") y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) model = Sequential() model.add(Dense(output_dim = 100, input_dim = 784, activation= "relu")) model.add(Dense(output_dim = 200, activation = "relu")) model.add(Dense(output_dim = 200, activation = "relu")) model.add(Dense(output_dim = nb_classes, activation = "softmax")) model.compile(optimizer = "adam", loss = "categorical_crossentropy") model.fit(X_train, y_train, batch_size = batch_size, nb_epoch = nb_epochs, show_accuracy = True, verbose = 2, validation_data = (X_test, y_test)) score = model.evaluate(X_test, y_test, show_accuracy = True, verbose = 0) print("Test score: ", score[0]) print("Test accuracy: ", score[1]) This code runs, and I get the result as expected: (60000L, 'train samples') (10000L, 'test samples') Train on 60000 samples, validate on 10000 samples Epoch 1/2 2s - loss: 0.2988 - acc: 0.9131 - val_loss: 0.1314 - val_acc: 0.9607 Epoch 2/2 2s - loss: 0.1144 - acc: 0.9651 - val_loss: 0.0995 - val_acc: 0.9673 ('Test score: ', 0.099454972004890438) ('Test accuracy: ', 0.96730000000000005) Up to this point everything runs perfectly, however when I apply the above algorithm to my dataset, accuracy gets stuck. My code is as follows: import os import pandas as pd from sklearn.cross_validation import train_test_split from keras.models import Sequential from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.layers.core import Dense, Activation, Dropout, Flatten from keras.utils import np_utils import AudioProcessing as ap import ImageTools as it batch_size = 128 nb_classes = 2 nb_epoch = 10 for i in range(20): print "\n" # Generate spectrograms if necessary if(len(os.listdir("./AudioNormalPathalogicClassification/Image")) > 0): print "Audio files are already processed. Skipping..." else: print "Generating spectrograms for the audio files..." ap.audio_2_image("./AudioNormalPathalogicClassification/Audio/","./AudioNormalPathalogicClassification/Image/",".wav",".png",(28,28)) # Read the result csv df = pd.read_csv('./AudioNormalPathalogicClassification/Result/result.csv', header = None) df.columns = ["RegionName","IsNormal"] bool_mapping = {True : 1, False : 0} nb_classes = 2 for col in df: if(col == "RegionName"): a = 3 else: df[col] = df[col].map(bool_mapping) y = df.iloc[:,1:].values y = np_utils.to_categorical(y, nb_classes) # Load images into memory print "Loading images into memory..." X = it.load_images("./AudioNormalPathalogicClassification/Image/",".png") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0) X_train = X_train.reshape(X_train.shape[0], 784) X_test = X_test.reshape(X_test.shape[0], 784) X_train = X_train.astype("float32") X_test = X_test.astype("float32") X_train /= 255 X_test /= 255 print("X_train shape: " + str(X_train.shape)) print(str(X_train.shape[0]) + " train samples") print(str(X_test.shape[0]) + " test samples") model = Sequential() model.add(Dense(output_dim = 100, input_dim = 784, activation= "relu")) model.add(Dense(output_dim = 200, activation = "relu")) model.add(Dense(output_dim = 200, activation = "relu")) model.add(Dense(output_dim = nb_classes, activation = "softmax")) model.compile(loss = "categorical_crossentropy", optimizer = "adam") print model.summary() model.fit(X_train, y_train, batch_size = batch_size, nb_epoch = nb_epoch, show_accuracy = True, verbose = 1, validation_data = (X_test, y_test)) score = model.evaluate(X_test, y_test, show_accuracy = True, verbose = 1) print("Test score: ", score[0]) print("Test accuracy: ", score[1]) AudioProcessing.py import os import scipy as sp import scipy.io.wavfile as wav import matplotlib.pylab as pylab import Image def save_spectrogram_scipy(source_filename, destination_filename, size): dt = 0.0005 NFFT = 1024 Fs = int(1.0/dt) fs, audio = wav.read(source_filename) if(len(audio.shape) >= 2): audio = sp.mean(audio, axis = 1) fig = pylab.figure() ax = pylab.Axes(fig, [0,0,1,1]) ax.set_axis_off() fig.add_axes(ax) pylab.specgram(audio, NFFT = NFFT, Fs = Fs, noverlap = 900, cmap="gray") pylab.savefig(destination_filename) img = Image.open(destination_filename).convert("L") img = img.resize(size) img.save(destination_filename) pylab.clf() del img def audio_2_image(source_directory, destination_directory, audio_extension, image_extension, size): nb_files = len(os.listdir(source_directory)); count = 0 for file in os.listdir(source_directory): if file.endswith(audio_extension): destinationName = file[:-4] save_spectrogram_scipy(source_directory + file, destination_directory + destinationName + image_extension, size) count += 1 print ("Generating spectrogram for files " + str(count) + " / " + str(nb_files) + ".") ImageTools.py import os import numpy as np import matplotlib.image as mpimg def load_images(source_directory, image_extension): image_matrix = [] nb_files = len(os.listdir(source_directory)); count = 0 for file in os.listdir(source_directory): if file.endswith(image_extension): with open(source_directory + file,"r+b") as f: img = mpimg.imread(f) img = img.flatten() image_matrix.append(img) del img count += 1 #print ("File " + str(count) + " / " + str(nb_files) + " loaded.") return np.asarray(image_matrix) So I run the above code and recieve: Audio files are already processed. Skipping... Loading images into memory... X_train shape: (2394L, 784L) 2394 train samples 1027 test samples -------------------------------------------------------------------------------- Initial input shape: (None, 784) -------------------------------------------------------------------------------- Layer (name) Output Shape Param # -------------------------------------------------------------------------------- Dense (dense) (None, 100) 78500 Dense (dense) (None, 200) 20200 Dense (dense) (None, 200) 40200 Dense (dense) (None, 2) 402 -------------------------------------------------------------------------------- Total params: 139302 -------------------------------------------------------------------------------- None Train on 2394 samples, validate on 1027 samples Epoch 1/10 2394/2394 [==============================] - 0s - loss: 0.6898 - acc: 0.5455 - val_loss: 0.6835 - val_acc: 0.5716 Epoch 2/10 2394/2394 [==============================] - 0s - loss: 0.6879 - acc: 0.5522 - val_loss: 0.6901 - val_acc: 0.5716 Epoch 3/10 2394/2394 [==============================] - 0s - loss: 0.6880 - acc: 0.5522 - val_loss: 0.6842 - val_acc: 0.5716 Epoch 4/10 2394/2394 [==============================] - 0s - loss: 0.6883 - acc: 0.5522 - val_loss: 0.6829 - val_acc: 0.5716 Epoch 5/10 2394/2394 [==============================] - 0s - loss: 0.6885 - acc: 0.5522 - val_loss: 0.6836 - val_acc: 0.5716 Epoch 6/10 2394/2394 [==============================] - 0s - loss: 0.6887 - acc: 0.5522 - val_loss: 0.6832 - val_acc: 0.5716 Epoch 7/10 2394/2394 [==============================] - 0s - loss: 0.6882 - acc: 0.5522 - val_loss: 0.6859 - val_acc: 0.5716 Epoch 8/10 2394/2394 [==============================] - 0s - loss: 0.6882 - acc: 0.5522 - val_loss: 0.6849 - val_acc: 0.5716 Epoch 9/10 2394/2394 [==============================] - 0s - loss: 0.6885 - acc: 0.5522 - val_loss: 0.6836 - val_acc: 0.5716 Epoch 10/10 2394/2394 [==============================] - 0s - loss: 0.6877 - acc: 0.5522 - val_loss: 0.6849 - val_acc: 0.5716 1027/1027 [==============================] - 0s ('Test score: ', 0.68490593621422047) ('Test accuracy: ', 0.57156767283349563) I tried changing the network, adding more epochs, but I always get the same result no matter what. I don't understand why I am getting the same result. Any help would be appreciated. Thank you. Edit: I found a mistake where pixel values were not read correctly. I fixed the ImageTools.py below as: import os import numpy as np from scipy.misc import imread def load_images(source_directory, image_extension): image_matrix = [] nb_files = len(os.listdir(source_directory)); count = 0 for file in os.listdir(source_directory): if file.endswith(image_extension): with open(source_directory + file,"r+b") as f: img = imread(f) img = img.flatten() image_matrix.append(img) del img count += 1 #print ("File " + str(count) + " / " + str(nb_files) + " loaded.") return np.asarray(image_matrix) Now I actually get grayscale pixel values from 0 to 255, so now my dividing it by 255 makes sense. However, I still get the same result. #### How to get labels ids in Keras when training on multiple classes? I'm using flow_from_directory to get the training set from a folder with the following structure: train class1 class2 class3 ... The generator is called as it follows: train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_height, img_width), batch_size=32, class_mode='categorical') I am not setting the argument classes, but I was expecting to get the labels in alphabetical order. classes: optional list of class subdirectories (e.g. ['dogs', 'cats']). Default: None. If not provided, the list of classes will be automatically inferred (and the order of the classes, which will map to the label indices, will be alphanumeric). However, when I classify the training images (for checking which labels are being returned), I'm don't get any specific ordering. The training goes well (accuracy of ~85%), and there is a consistency with the output labels when classifying images from the same class. How can I infer the labels numbers generated by flow_from_directory and map them to the classes? ### QuantOverflow #### Reference Request: Trader Replication I am looking for any reference where the following problem was addressed: given the list of trades of a trader teach an AI to replicate that trader's strategy. There are several well-known results on hedge fund replication (usually factor based) and methods focusing on the replication of the return distribution (usually by stochastic optimization) but I am looking for a more "active" replication where the machine learns to trade as a trader. The main difference is that in my case I would know all the historical trades which is usually not known with other approaches. ### CompsciOverflow #### Why is Hamiltonian Path and graph coloring np complete and shortest path p when the former can also be solved using DFS recursively? NP is a complexity class that represents the set of all decision problems for which the instances where the answer is "yes" have proofs that can be verified in polynomial time. But hamiltonian path can also be deduced by recursively traversing dfs. ### StackOverflow #### text mining (URLs) I have a list of URLs (links). I show this list to the user, User can deselect (turn off) some Urls, I want create based learning and automatically deselect other urls who have commun text upper than 90%. can any one please give me algorithm can do that #### How can I extract to see each result of Classify Cross-Validation in weka? I use weka for classify tree.J48 and select 10 Folds Cross-validation,so the final reult is show calculate average result of 10folds,Can I extract to see result each folds? ### QuantOverflow #### Help with amortisation table in Excel VBA [on hold] I am a beginner with VBA. I am trying to create an amortisation table where the interest rate used depends on two inputs which will be provided by the user. For example if X=2; and Y=3, then interest rate = 5%. Values of X, Y and interest rate are already set in a 4 X 6 matrix. I am confused as to how to tell VBA what to do. Is an array the best way to go? If so how do I use it? The IF function could work but it would take up a lot of time. Is there a more efficient way of doing it? Thank you. ### Planet Emacsen #### Irreal: Vi and Emacs Without the Religion Chris Patti over at Blind Not Dumb has gathered his courage and written a piece on vi versus Emacs. He approaches the subject, as the title suggests, without the usual religious fervor. His take, which is hard to argue with, is that the best editor depends on what you are trying to do. If you want to edit text as quickly and efficiently as possible then vi/Vim is probably the editor for you. Be aware, though, that Vim is an editor not an IDE. Patti says that efforts to bolt on IDE-like features rarely end well. Either the extension doesn't work well or it destabilizes Vim. Emacs, on the other hand, is more of a programming environment that is highly optimized for dealing with text. That means that you can not only edit but do other other—usually but not always—text oriented tasks in the same environment. That gives rise to the familiar—to Emacsers—tendency to move everything possible inside Emacs. The other advantage of Emacs is that you can customize it to operate in almost any conceivable way. Vim, of course, is also customizable but not nearly to the same extent. Patti's post is a balanced recounting of the benefits of each editor and may help n00bs trying to decide which one to use to pick the editor best suited for them. I'd bet that almost every Emacs/vi user knows and have used both. Many people start with one and switch to the other for some reason. From my point of view, I love using Emacs because I have adjusted it to enable a nearly frictionless workflow. Still, there are times when only vi/Vim is available so I'm glad to know both. ### Fefe #### Liebe Leser, setzt euch mal bitte stabil hin, bevor ... Liebe Leser, setzt euch mal bitte stabil hin, bevor ihr auf diesen Link klickt. Google hat mal geguckt, was die Briten gerade so googeln, seit die Brexit-Ergebnisse verkündet wurden. Hier ist die Liste: 1. What does it mean to leave the EU? 2. What is the EU? 3. Which countries are in the EU? 4. What will happen now that we've left the EU? 5. How many countries are in the EU? Ach du Scheiße ist das übel! JETZT googeln die das?! Bisschen spät! Der Einsender führt aus: Die Highlights bisher: +250% spike in "what happens if we leave the EU" in the past hour "What happens if we leave the EU?" Top questions on #EUref in Scotland since polls closed +680% spike in searches for "Move to Gibraltar" in London since polls closed #EUref "What will happen if we leave the EU?" Top questions on the #EUref in Northern Ireland since first results announced "What is the EU referendum?" Top questions on #EUref in Birmingham since polls closed (Anm. d. Red.: OMGWTF!?!?) +100% spike in UK searches for "getting an Irish passport" after #Brexit vote #### Ein Einsender kommentiert:Von der demographischen Verteilung ... Ein Einsender kommentiert: Von der demographischen Verteilung her, war die überwiegende Mehrheit der Brexitbeführworter Rentner und finanziell Schwache; 60% der unter 25-jährigen waren für einen Verbleib. Wenn es zu dem zu erwartenden wirtschaftlichen Abschwung kommt (und sei es nur weil die ausländischen Investionen wegen der Unsicherheit heruntergefahren werden), dürfte Großbritannien ein ganz anderes Immigrations- problem bekommen: nämlich ein negatives. Ähnliches hat man ja schon in Spanien sehen können, eventuell sprechen die Krankenschwestern in den hiesigen Krankenhäusern in ein paar Jahren englisch statt spanisch (ich war in den letzen ein paar mal in einem KH und da ist das wirklich aufgefallen) #### Einige Einsender fragen, ob man Brexit nicht im Kontext ... Einige Einsender fragen, ob man Brexit nicht im Kontext von TTIP sehen müsse. Dass das der Versuch sei, sich seine Souveränität zu bewahren, wenn irgendwelche intransparenten EU-Funktionäre im geheimen Kämmerlein geheime Geheimabkommen mit geheimen Gesprächspartnern aus anderen Ländern ausgeheimnissen. Meiner Wahrnehmung nach war das kein Thema bei Brexit, aber vielleicht habe ich das bloß übersehen. ### infra-talk #### TDD in a REPL, Continued: Read-Eval-PrettyPrint-Loop Last December, I wrote about using F#’s interactive REPL to facilitate TDD. Since then, enough interesting developments have happened that I think the topic is worth revisiting. ## Pretty-Printing Output REPLs are great, but sometimes the “Print” step can leave a little to be desired. F# has pretty good default REPL formatting for records, lists, etc., but not necessarily for other types, like traditional classes. You can add your own REPL format function for a type, but you’re still limited to string output. For large data structures, being able to expand and collapse the output or view a summary would be very helpful. Luckily, just such a capability was recently added to the F# REPL–provided you are using the Atom editor. The Ionide F# plugin for Atom was updated to make it support rendering objects to the F# interactive output window as HTML. Basically, instead of providing a formatting function of (YourType -> string) where the string is raw output to display, you provide a function of (YourType -> string) where the output is presumed to be HTML. Being able to display your data with the standard web technologies (HTML, CSS, and JavaScript) means tons of cool existing frameworks can be brought to bear in your REPL. Check out this repo for some examples of creating tables and graphs with FsLab. ## Better-looking Test Results The update to Ionide helped me address my previously stated goal of creating prettier test output. Since then, I’ve updated my previous TDD script to dramatically increase the ease of reviewing test output. I’ve gone from simple text output that is all in the same color and size to this: In the updated script, I’ve built very lightweight data structures that resemble what a test runner would normally do. Then I turned those structures into some simple HTML and CSS. Eventually, it would be awesome to leverage an existing test runner’s HTML output, but that’s a project for another day. I’ve created a Github repo for others who would like to use my TDD script for their F# development. I considered creating a NuGet package for these scripts, but I suspect the desire for consumers to tweak the code will be high enough that just copying or forking the code will be easier. Because there’s nothing to compile, you could alternatively reference the project as a Git submodule to easily get the latest updates. ## Choice of Test Framework I also have an update on one of my caveats from last time. Previously, I was experiencing crashes with the xUnit console runner on Mono. However, because the NUnit assertions do not work without running through the NUnit framework, I had to use xUnit to do TDD in the F# REPL. Even though FsUnit mostly papered over those differences, it still really bothered me. Lately, though, I discovered the source of my crashes, and it is easy to work around. If you load a project into the console runner, which is of type Exe and not Library, you will occasionally get random crashes. So I just changed my main project to a Library and gave it a thin, untested executable wrapper. ## Pretty and Fast I love being able to change code in a compiled language and see easy-to-read test output instantaneously. The best part is how well it scales. You don’t pay the penalty for compiling the entire project to run one test—or even the cost of starting up the runtime. You only have to send a chunk of the script into an already-running instance of the runtime. ## Going Forward I’d like to see the Ionide changes make their way over to VS Code. Because VS Code has decent F# debugging and a halfway decent Vim plugin, it is within striking distance of being a one-stop shop for F# development for me on OS X. I’d also like to try and integrate the output formatters for existing test frameworks when possible. xUnit is always going to know how to format their results better than I do. The trick will be getting their formatting code to cooperate in an environment that’s nothing like their test runner. Being able to make incremental but significant progress in tooling—as I hope I have done by prettifying the output of REPL-based TDD—highlights one of my favorite things about open source software communities. Any contributor can move the ball forward more-or-less independently, and everyone reaps the benefits. Where do you want to make your next contribution to improving the process and practice of making software? The post TDD in a REPL, Continued: Read-Eval-PrettyPrint-Loop appeared first on Atomic Spin. ### Related Posts ### Lamer News #### Why Service Marketplace Websites Will See A Huge Rise In 2016? ### CompsciOverflow #### Width of a String/Line in an Image I have been looking for a method to find the width of a line. I found the solution(link: http://www.mathworks.com/matlabcentral/answers/88645-average-width-of-lines), "sum up the image to get the area, then skeletonize the image and sum up that image to get the total length.Then divide the area by the length." Can anyone please explain the following? 1. what is summing up to get image area? 2. what is summing up skeletoniized image to get length? I did not get this. I have to find the thickness of a string from a given image. I can take the image of any dimension and quality I want. How do i measure the thickness of a string? That is what I was trying to resemble as width of a line. Essentially, thickness of a string can be thought as width of a line, I think. Moreover, I can not assign any unit to my measurement i.e., m/cm/mm. So I have to measure some sort of ratio to represent the thickness and reach a decision. My outcome should be Yes or No based on the thickness. For example, if the thickness is greater that a certain value I would not take the string to process. The string can be made of anything. ### StackOverflow #### What is maximum likelihood estimation in logistic regression? [migrated] Can you please explain in simple way. Is it so important in logistic regression? ### Fefe #### Liebe Leser, die Umfallerpartei (früher auch unter ... Liebe Leser, die Umfallerpartei (früher auch unter dem Namen "SPD" bekannt) wird sich warm anziehen müssen: Nigel Farage (UKIP) kippt noch vor Ende der Auszählung um, kassiert sein wichtigstes Brexit-Wahlkampfversprechen wieder ein. Das war nämlich eine glatte Lüge mit den 350 Millionen. John Oliver hatte das neulich bei Last Week Tonight ausführlich erklärt. #### Im Brexit-Newsticker der Tagesschau gibt es zwei Nuggets, ... Im Brexit-Newsticker der Tagesschau gibt es zwei Nuggets, die ich euch mal hier zitieren will: Durch den Kursverfall des Pfund ist Großbritannien nach Angaben der Denkfabrik London Economics nicht mehr die fünftgrößte Volkswirtschaft der Welt. "Das Pfund ist so stark gefallen, dass uns Frankreich überholt hat", erklärte das Institut. Ausgerechnet der Erbfeind Frankreich! Karma is a bitch! Und dann gab es da noch diese grandiose Reaktion aus Russland: Kremltreue russische Politiker haben in ersten Reaktionen das britische Votum für einen Austritt aus der EU begrüßt. Für das britische Volk sei es ein großer Erfolg, sagte der nationalistische Vize-Parlamentschef Wladimir Schirinowski von der Partei LDPR. "Das ländliche, provinzielle, arbeitende Großbritannien hat Nein gesagt zu der Union, die von der Finanzmafia, Globalisten und anderen geschaffen wurde", sagte er der Agentur Interfax zufolge. Ob der da die Wiederkehr des Kommunismus wittert? Das Proletariat erhebt sich gegen die Bourgeoisie? #### Eine Sache, die bei Brexit ja so klar wird wie sonst ... Eine Sache, die bei Brexit ja so klar wird wie sonst lange nicht mehr: Die Jungen (18-24) haben mit überwältigender Mehrheit gegen Brexit gestimmt, aber sind von den ängstlichen alten Säcken überstimmt worden. Die Jungen werden damit aber viel länger leben müssen als die Alten. Vielleicht sollte man mal darüber nachdenken, ob man Stimmen irgendwie nach Restlebenserwartung gewichtet. Update: Hier ist ein Leserbrief an die Financial Times, der das schön auf den Punkt bringt (leider nur als Screenshot wegen Paywall): Secondly, the younger generation has lost the right to live and work in 27 other countries. We will never know the full extent of the lost opportunities, friendships, marriages and experiences we will be denied. Freedom of movement was taken away by our parents, uncles, and grandparents in a parting blow to a generation that was already drowning in the debts of our predecessors. #### Und hier ist noch ein Nugget aus der "Welt":Die Präsidenten ... Und hier ist noch ein Nugget aus der "Welt": Die Präsidenten der EU-Institutionen machten zugleich deutlich, dass Großbritannien bei künftigen Abkommen als Drittland behandelt wird. Befürworter eines britischen EU-Austritts hatten im Wahlkampf gesagt, dass das Königreich Sonderkonditionen in Brüssel herausschlagen könne. Und im Hintergrund hört man ein *plopp*, als die Träume der Briten zerplatzen. ### CompsciOverflow #### How to prove greedy algorithm is correct I have a greedy algorithm that I suspect might be correct, but I'm not sure. How do I check whether it is correct? What are the techniques to use for proving a greedy algorithm correct? Are there common patterns or techniques? I'm hoping this will become a reference question that can be used to point beginners to; hence its broader-than-usual scope. Please take care to give general, didactically presented answers that are illustrated by at least one example but nonetheless cover many situations. Thanks! #### How many pages does the page table require? I have found this exercise on the Internet but I´m having problems with sections 4 and 5, because I don´t understand where do the solutions appear from. Example: Mapping VAs to PAs Suppose - Virtual memory of 2^32 bytes - Physical memory of 2^24 bytes - Page size is 2^10 (1K) bytes 1. How many pages can be stored in physical memory at once? 2^(24-10) = 2^14 2. How many entries are ther in the page table? 2^22 3. How many bits are necessary per entry in the page table? (Assume each entry has PPN, residentbit, dirty bit) 16 4. How many pages does the table require? 2^23 bytes = 2^13 pages 5. What is the largest fraction of VM that might be resident? 1/(2^8) 6. A portion of the page table is given to the left. What is the physical address for virtual address 0x1804? VPN = 6 --> PPN = 2 --> PA = 0x804 $$\begin{array}{c|c|c|} \text{VPN} & \text{D} & \text{R} & \text{PPN} \\ \hline \text{0} & 0 & 0 & 7 \\ \hline \text{1} & 1 & 1 & 9 \\ \hline \text{2} & 1 & 0 & 0 \\ \hline \text{3} & 0 & 0 & 5 \\ \hline \text{4} & 1 & 0 & 5 \\ \hline \text{5} & 0 & 0 & 3 \\ \hline \text{6} & 1 & 1 & 2 \\ \hline \text{7} & 1 & 0 & 4 \\ \hline \text{8} & 1 & 0 & 1 \\ \hline \text{...} & ... & ... & ... \\ \hline \end{array}$$ Could you help me with this two exercises? Thank you in advance #### Give a regular expression for L = {set of all strings in which number of a's are multiples of 3} ∑={a,b,c} [duplicate] This question already has an answer here: Give a regular expression for L = {set of all strings in which number of a's are multiples of 3} ∑={a,b,c} #### What is the compleixty of this algorithm? The algorithm is as follows: a = rand % a random number between 0 and 1 b = a while b == a b = rand end Here rand is a function that returns a random number, generated uniformly, between 0 and 1. Let us say that this is the MATLAB function rand. What is the time complexity of this algorithm? It looks like the best and average complexities are$O(1)$and the worst complexity is unbounded. #### Can we say DFA is more efficient than NFA? I just started reading about theory of computation. If we compare which is more powerful (in accepting strings), both are same. But what about efficiency ? DFA will be fast compared to NFA, since it has only one outgoing edge & there will be no ambiguity. But in case of NFA we have to check all possible cases & that surely takes time. So can we say DFA is more efficient than NFA ? But, my other part of brain is also thinking that NFA exists only in theory, so we cannot compare it's efficiency with DFA. #### Converting pseudo code to a recurrence relation equation? [duplicate] This question already has an answer here: The following is pseudo code and I need to turn it into a a recurrence relation that would possibly have either an arithmetic, geometric or harmonic series. Pseudo code is below. I have so far T(n) = T(n-5) + c #### What problem to give to check a person's problem-solving capability? [on hold] Suppose, you are a lead of a team who solves problems through the use of different problem-solving techniques and algorithms. You were tasked to pick a new member for your team from a pool of junior candidates. You are only allowed to give one question to the candidates. What would be this one question (preferably a problem) that you will give to the candidates to check their capabilities with regards to problem-solving? And under what criteria would you follow in picking the 'best' candidate? Since this is posted here, I'm hoping for solutions/problems that require knowledge with Computer Science (algorithms, problems, etc.) #### Project to nearest point in convex polytope Is there a reasonably efficient algorithm for the following task? Input: a point$x \in \mathbb{R}^d$; a convex polytope$\mathcal{C} \subseteq \mathbb{R}^d$Find: a point$y \in \mathcal{C}$that is as close to$x$as possible Assume that$\mathcal{C}$is specified by a collection of linear inequalities, that the dimension$d$is fairly high, and "close" is measured using$L_2$distance, i.e., we want to minimize$||x-y||_2$. is there an efficient algorithm for this problem? I can see how to solve this in polynomial time using linear programming if "close" were measured using$L_1$or$L_\infty$distance, but I'm more interested in the$L_2$distance metric. I keep thinking there might be some algorithm based on identifying the set of inequalities that are violated by$x$and then doing something, but I can't quite put together a working algorithm. I found the following paper which describes an algorithm (exponential-time in the worst case but often efficient, like the simplex method): Philip Wolfe. Finding the Nearest Point in a Polytope. Mathematical Programming, vol 11, 1976, pp.128--149. However, that paper requires$\mathcal{C}$to be presented as a list of vertices rather than a list of inequalities, so it can't be used for my problem. (Converting from inequalities to a set of vertices will cause an exponential blowup; typically the number of vertices is exponential in the number of inequalities.) ### Fred Wilson #### Feature Friday: iPhone Hearing Test Last week in a blog post, I wrote: My daughter’s iPhone can’t deliver a strep test to her, yet. But it can deliver an eye exam and a hearing test. So I thought I would showcase one of these iPhone based medical tests on feature friday today. If you download an app on your iPhone called “Mimi Hearing Test” and install it, you can test your hearing with your earbuds. I did that this morning. It is just like the hearing tests you do at the doctor’s office. As I suffer from mild hearing loss, I have done this test a bunch in doctors offices over the years. You sit in a quiet spot and wait for beeps in your right or left ear and when you hear them you press the right or left button. The quick test lasts about three or four minutes and when you are done you get results like this: and You then store your profile in the system and are then offered additional services: And, once you’ve done an in-depth test and stored that, you can use the Mimi Music app to enhance the music listening on your iPhone. I assume that is just the start of a suite of enhanced audio apps and products that Mimi intends to offer. Full disclosure – The Gotham Gal is an investor in Mimi and therefore I have a financial interest in the company too. ### StackOverflow #### how to train neural network with probabilistic input Hello and thanks for helping, My question is a long time problem that I try to tackle : • How do we train a neural network if the input is a probability rather than a value ? To make it more intuitive : • Let's say we have 6 features and the value they may take is 1 or -1 for each. • Their value is determined probabilistically, such as the feature 1 can be 1 with 60% probability or -1 with 30% probability. How do we train the network if in each trial, we may get a INPUT value in accordance with the probability distribution of each feature ? ### Fefe #### Kurze Durchsage des Bundesverfassungsgerichtes:Die ... Kurze Durchsage des Bundesverfassungsgerichtes: Die Kundgabe der Buchstabenkombination „ACAB“ im öffentlichen Raum ist vor dem Hintergrund der Freiheit der Meinungsäußerung nicht ohne weiteres strafbar. #### Oh wow. Die Briten haben für Brexit gestimmt.Für ... Oh wow. Die Briten haben für Brexit gestimmt. Für mich ist das wie die Trump-Präsidentschaft. Auf rationaler Ebene war klar, dass das geschehen kann. Aber auf emotionaler Ebene fühlte es sich völlig ausgeschlossen an. Jetzt stellen sich gleich mehrere Fragen. Erstens: Wird das Parlament das Ergebnis überhaupt als bindend akzeptieren. Müssen tun sie das nicht. Auf der anderen Seite reden wir hier von einer Wahlbeteiligung von 72,2 Prozent, d.h. da haben so viele Millionen von Menschen teilgenommen, dass man da schon eine große Schranke argumentativ überwinden müsste, um das einfach zu ignorieren. Zweitens: Wo gehen jetzt die ganzen Finanz-Zocker hin? Nach Frankfurt? Drittens: Wer wird das neue U-Boot der USA in der EU? Ich tippe auf Polen. Oh, besonders entsetzt sind die Schotten, die gerne in der EU geblieben wären. Die werden jetzt wohl ein neues Unabhängigkeitsreferendum machen. Auch Gibraltar hatte sich sehr klar für einen Verbleib in der EU ausgesprochen. Die Spanier nehmen das als Wink mit dem Zaunpfahl und melden Interesse an Gibraltar an. Die nächste Frage ist, ob wir gerade das Ende der Konservativen Partei erlebt haben. Dieser Brexit-Sieg war vor allem ein großer Sieg für die Euro-Skeptiker UKIP um den Schleimbolzen Nigel Farage. Natürlich sehen die anderen Rechtspopulisten das als Rückenwind. Geert Wilders zum Beispiel. Nordirland wollte übrigens auch klar in der EU bleiben. Ob die sich jetzt auch abspalten und zu Irland wechseln? #### Eine der Brexit-Fragen ist ja auch, wie sich die UK-Wirtschaft ... Eine der Brexit-Fragen ist ja auch, wie sich die UK-Wirtschaft jetzt entwickeln soll. Die haben ja erfolgreich praktisch alles eingestampft, was da mal produziert wurde. Oder fallen auf Anhieb jemandem britische Exportgüter ein, die man hier im Supermarkt findet? Walkers Shortbread und Whisky zählen nicht, die kommen aus Schottland. Hier ist eine Übersicht. Über die Hälfte der Exporte der Briten gehen in die EU. Wenn die EU Zölle auf Güter aus Großbritannien erhebt, wäre das verheerend. Umgekehrt wäre es natürlich auch doof für Deutschland, wenn die Briten Zölle auf Importe aus der EU erheben würden. Mein erster Gedanke war, dass die Briten das mit den Zölle machen werden müssen, denn ihre Handelsbilanz ist negativ. Die kaufen mehr als sie verkaufen. Wenn sie das einrenken wollen, müssen sie die Inlandsquote verbessern. Dafür sorgen, dass die Leute wieder selber Dinge herstellen, und dass sie bevorzugt Güter aus dem eigenen Land kaufen. Das Mittel dafür wären Zölle auf Importe. Aber dann würde natürlich auch die EU Zölle auf Güter aus Großbritannien verhängen. Das ist alles nicht so einfach. Der Grund, warum ich das anspreche: UKIP findet, die EU sollte aus Eigeninteresse den Briten zollfreien Zugang zum EU-Binnenmarkt eröffnen. Begründung: Neil Hamilton, from UKIP’s national executive committee, told BBC Radio 4 today that it was in the EU’s interest to make that happen following the vote to leave the bloc “because we have a huge trade deficit with them.” Mit anderen Worten: Wenn die Deutschen jemals ihre Schulden zurückgezahlt kriegen wollen, müssen sie uns zollfrei exportieren lassen. ### CompsciOverflow #### Distance vector VS Link state routing Question Consider the following three statements about link state and distance vector routing protocols, for a large network: [S1] The computational overhead in link state protocols is higher than in distance vector protocols. [S2] A distance vector protocol (with split horizon) avoids persistent routing loops, but not a link state protocol. [S3] After a topology change, a link state protocol will converge faster than a distance vector protocol. Which one of the following is correct about S1, S2, and S3 ? On this quiz S1 and S3 are true, but S2 is false is given as answer. I am ok with S2 and S3 but why S1 is true,since Distance vector routing also involves considerable amount of overhead in construction of routing table. I had gone through standard books like Tanenbaum's Computer Networks and read some articles on routing but still I am unable to draw S1 as conclusion. It will be really helpful if someone could explain why S1 is true. ### StackOverflow #### Python - text mining - TypeError: __hash__ method should return an integer I am working on a classification problem in python. Fact is, I'm not good yet in python. So I have the same problem since a long time now and I don't know how to fix it. I hope you could help me :) This is my code : tableau = pandas.DataFrame({'Exigence':exigence,'Résumé':resume}) df2, targets = encode_target(tableau,"Exigence") features = list(df2.columns[:4]) for line in resume: terms = prep.ngram_tokenizer(text=line) mx.add_doc(doc_id='some-unique-identifier', doc_class=df2["Target"], doc_terms=terms, frequency=True, do_padding=True) And I have this error : objects are mutable, thus they cannot be hashed Traceback (most recent call last): File "<ipython-input-9-072e9c71917a>", line 7, in <module> do_padding=True) File "C:\Users\nouguierc\AppData\Local\Continuum\Anaconda3\lib\site- packages\irlib\matrix.py", line 222, in add_doc if doc_class in self.classes: TypeError: __hash__ method should return an integer When I go the the line 222 of matrix.py I see this : if doc_class in self.classes: self.classes[doc_class].add(my_doc_terms) The function containing those lines is : def add_doc(self, doc_id = '', doc_class='', doc_terms=[], frequency=False, do_padding=False): ''' Add new document to our matrix: doc_id: Identifier for the document, eg. file name, url, etc. doc_class: You might need this in classification. doc_terms: List of terms you got after tokenizing the document. frequency: If true, term occurences is incremented by one. Else, occurences is only 0 or 1 (a la Bernoulli) do_padding: Boolean. Check do_padding() for more info. ''' # Update list of terms if new term seen. # And document (row) with its associated data. my_doc_terms = SuperList() for term in doc_terms: term_idx = self.terms.unique_append(term) #my_doc_terms.insert_after_padding(self.terms.index(term)) if frequency: my_doc_terms.increment_after_padding(term_idx,1) else: my_doc_terms.insert_after_padding(term_idx,1) self.docs.append({ 'id': doc_id, 'class': doc_class, 'terms': my_doc_terms}) # Update list of document classes if new class seen. # self.classes.unique_append(doc_class) if doc_class in self.classes: self.classes[doc_class].add(my_doc_terms) else: self.classes[doc_class] = my_doc_terms if do_padding: self.do_padding() What do you think about my problem ? Célia :) ### TheoryOverflow #### Characterisation of P in terms of register machines It is a well-known result that Turing machines and random access machines (RAMs) can simulate each other with a polynomial slowdown. It is relatively straightforward to prove that indirect addressing and binary shift (or, equivalently, division by two) are actually redundant when working in polynomial time, i.e., that polynomial-time Turing machines are equivalent to polynomial-time Minsky-like register machines with a constant number of registers, augmented with constant-time addition and subtraction instruction (rather than just increment and decrement). This can be showed by encoding the sequence of symbols on the tape of the Turing machine as a single large integer stored in one of the registers; the individual symbols can be accessed by performing multiplications and divisions (implemented via repeated doubling). (Incidentally, register machines without constant-time addition and subtraction are provably slower than Turing machines on certain problems.) I have the feeling that this result must have been published a long time ago, but I seem unable to find it in the literature. Does anyone know a reference (possibly the first published one) for it? ### StackOverflow #### Which way is better to encapsulate a single unit of work in Node.js, OOP or functional? I have a complex buisness action for example deleting a user account. It contains multiple connected steps, and has to keep track of some state between steps. What is a better approach for writing this action? I see a lot of more functional approach like one below. function someAction(someParam, anotherParam, callback) { async.waterfall([ step1, step2, step3, step4 ],callback ); function step1(p,cb){/**use someParam and anotherParam here via closure*/} function step2(p,cb){/**...*/} function step3(p,cb){/**...*/} function step4(p,cb){/**...*/} }; someAction('value', 1241, (err)=>{/**...*/}); What I don't like about this approach is that everything is defined within the scope of a single function (here someAction). I find a more object-oriented way to be a little more readable. The state and the stepX functions are not truly private - sometimes it is convenient for testing. function SomeAction(someParam, anotherParam){ //private state this._someParam = someParam; this._anotherParam = anotherParam; }; SomeAction.prototype._step1 = function(p, cb){ //use this._someParam and this._anotherParam }; SomeAction.prototype._step2 = function(p, cb){ //use this._someParam and this._anotherParam }; SomeAction.prototype._step3 = function(p, cb){ //use this._someParam and this._anotherParam }; SomeAction.prototype._step4 = function(p, cb){ //use this._someParam and this._anotherParam }; //public api SomeAction.prototype.execute = function(callback) { async.waterfall([ this._step1, this._step2, this._step3, this._step4 ],callback ) }; new SomeAction('value', 1241).execute((err)=>{/**...*/}) Is there any performance difference between them ? What is the recommended approach in Node.js ? Is it true that each time I callsomeAction in functional approach - all the stepX functions have to be defined from scratch ? ### QuantOverflow #### Reference for option pricing, binomial multi-period model using martingales and conditional expectations The title basically says it all. I am looking for a reference text on the pricing of options in a binomial multi-period model. It should be mathemathically rigorous using martingales and conditional expectations yet be self-contained as much as possible. ### StackOverflow #### How to properly debug OCaml code? Can I know how an experienced OCaml developer debugs his code? What I am doing is just using Printf.printf. It is too troublesome as I have to comment them all out when I need a clean output. How should I better control this debugging process? special annotation to switch those logging on or off? thanks #### How do I pass by reference in Scala? with a ListBuffer I want to pass an object in Scala by reference. The pseudo code is something like var list1 = List(//stuff in here) var list2 = List(//stuff in here).toBuffer list1.forEach(element => foo(element,list2) def foo(element,list2){ remove something from list 2 } so on each iteration of the forEach, list2 would be different :/ What would be the best way to do this on scala. Thanks :D ### TheoryOverflow #### Finding All Cliques of an Undirected Graph How can I list all cliques of an Undirected Graph ? (Not all maximal cliques, like the Bron-Kerbosch algorithm) ### StackOverflow #### visualizing hyperplane equation of SVM I have been trying to understand the SVM algorithm and i can not fully get the hyperplane equation. The equation is- w.x-b=0. What i understand(with lots of confusions) is- x is unknown set of all the vectors that constitutes the hyperplane and w is normal vector to that hyperplane. We do not know the w, we need to find the optimal w from training set. Now, we all know, if two vectors are perpendicular to each other then their dot product is zero. So, if w is normal to x then that means it should be w.x=0, but why it's saying w.x-b=0 or w.x=b?(normal and perpendicular is same thing, right?) In normal sense, what i understand if w.x=b, then w and x is not perpendicular and the angle between them is more or less than 90 degree. Another thing is, in most tutorials(even MIT professor in his lecture) it is being said, that x is projecting on w, but as I know if i want to take projection of x onto w then it will be x.w/|w| (without the direction of w), not only w.x . Am i right with this point? I think, i am missing something or misunderstanding something. Can anybody help me with this? #### Fisher Vector with LSH? I want to implement a system where given an input image, it returns a reasonable similar one (approximation is acceptable) in a dataset of (about) 50K images. Time performances are crucial. I'll use a parallel version of SIFT for obtaining a matrix of descriptors D. I've read about Fisher Vector (FV) (VLfeat and Yael implementations) as a learning and much more precise alternative to Bag of Features (BoF) for representing D as a single vector v. My question are: 1. What distance is used for FVs? Is it the Euclidean one? In that case I would use LSH in eucledian distance for quickly find approximate near neighbor of FVs. 2. There is any other FV efficient (in terms of time) C++ implementation? #### Tensorflow "map operation" for tensor? I am adapting the cifar10 convolution example to my problem. I'd like to change the data input from a design that reads images one-at-a-time from a file to a design that operates on an already-in-memory set of images. The original inputs() function looks like this: read_input = cifar10_input.read_cifar10(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) # Crop the central [height, width] of the image. resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, width, height) In the original version, read_input is a tensor containing one image. I keep all my images in RAM, so instead of using filename_queue, I have one huge images_tensor = tf.constant(images), where images_tensor.shape is (something, 32, 32, 3). My question is very-very basic: what is the best way to apply some function (tf.image.resize_image_with_crop_or_pad in my case) to all elements of images_tensor? Iterating is problematic in tensorflow, with limited slices(TensorFlow - numpy-like tensor indexing). Is there a solution to achieving this using just one command? ### CompsciOverflow #### When is the Computational complexity better? I minimize a function using genetic algorithms There are two methods to declare my chromosome • as integers • as bits (as certain discrete values) Which of the two coding methods are better regarding the Computational complexity ### Lobsters #### List Story Title and Tag suggestions In many cases, an author forgets to add the year of publication to a post and a while back, the feature was added to suggest title changes. If multiple identical suggestions accumulated, the change would happen automatically. There are other cases though, where a title change might make sense but where a common change is not obvious. What I propose is a way to see which title or tag suggestions there are for a post and an easy way to show your support for one of those suggestions or adding a new one. This would remove some comment noise where people discuss a title change, having to resort to really carefully examine char-by-char, how exactly their suggestions look like. What do you people think? ### StackOverflow #### How to reduce image resolution of an image in a realistic way Deep learning has shown quite remarkable progress in image super resolution lately. But I fear that the data generation method is not realistic enough, hence my question. Background: The basic premise of applying deep learning to the problem is as following: A low resolution image is generated from a high resolution image, using simple downsampling. The low resolution image or alternatively an interpolated image are fed into a neural network and a high resolution image is estimated. Question: The classic method of down-sampling the data produces some pretty significant results What I fear is that in this example, the neural network simply learns to estimate the inverse of this specific interpolation as best as it can. But since resolution degradation in real world in usually not consisting of a downsample and then an upsample with specific interpolation scheme, I believe this algorithm does not generalize as well to real world examples. So my question is: Is there a better, more realistic, way to reduce the resolution of an image than naive down-sampling, so we can better train and test the performance of the algorithm? #### Why does ALS.trainImplicit give better predictions for explicit ratings? I am using PredictionIO 0.9.6 and the Recommendation template for collaborative filtering. The ratings in my data set are numbers between 1 and 10. When I first trained a model with defaults from the template (using ALS.train), the predictions were horrible, at least subjectively. Scores ranged up to 60.0 or so but the recommendations seemed totally random. Somebody suggested that ALS.trainImplicit did a better job, so I changed src/main/scala/ALSAlgorithm.scala accordingly: val m = ALS.trainImplicit( // instead of ALS.train ratings = mllibRatings, rank = ap.rank, iterations = ap.numIterations, lambda = ap.lambda, blocks = -1, alpha = 1.0, // also added this line seed = seed) Scores are much lower now (below 1.0) but the recommendations are in line with the personal ratings. Much better, but also confusing. PredictionIO defines the difference between explicit and implicit this way: explicit preference (also referred as "explicit feedback"), such as "rating" given to item by users. implicit preference (also referred as "implicit feedback"), such as "view" and "buy" history. and: By default, the recommendation template uses ALS.train() which expects explicit rating values which the user has rated the item. source Is the documentation wrong? I still think that explicit feedback fits my use case. Maybe I need to adapt the template with ALS.train in order to get useful recommendations? Or did I just misunderstand something? #### Can anybody provide the classification of unsupervised learning algorithm? Please, provide the detailed classification of unsupervised machine learning algorithm. ### TheoryOverflow #### Cluster Edge Deletion on 2-trees Definitions: Cluster Edge Deletion problem is a graph modification problem, in which we want to remove the minimum number of edges such that the resulting graph does not contain a$P_3$as an induced subgraph (that is, the resulting graph is a disjoin union of cliques). The class of$k$-trees is defined as follows: a complete graph with$k$vertices is a$k$-tree; a$k$-trees with$n + 1$vertices$(n > k)$can be constructed from a$k$-tree$T$with$n$vertices by adding a vertex adjacent to all vertices of a$k$-clique of$T$, and only to these vertices. Question: Does exist a polynomial-time algorithm to solve Cluster Edge Deletion on 2-trees? My idea: Let$G$be the 2-tree. I transform$G$to a new graph$G'$where, each node of$G'$represents a$K_3$in$G$. And two nodes of$G'$are adjacent if and only if their corresponding$K_3$share a common vertex in$G$. Let$M$be the resulting graph after removing the minimum edges from$G$. I think that if always exist some graph$M$contains a 3-clique$H$which has a node with minimum degree in$G'$, then I can solve this problem on$G \backslash H$. For example as below figure, I choose node$x$with minimum degree 3 in$G'$, and after remove all vertices of clique$\{f,g,e\}$in$G$, I got an union disjoint of two clique$\{f,g,e\}$and$\{b,c,d\}$in$G$. ### Lobsters #### Trace objc method call for ios and mac app ### StackOverflow #### How to use UID when building a machine learnin pipeline with scala spark.ml? I'm trying to build a spark.ml pipeline with my own models. That means that I implements some classes that are extending Estimator and Model. Those classes require an UID as parameter. I made the assumption that the uid was just a name, so I put something like "pipeStage_1", "pipeStage_2" etc. I also made the assumption that when calling Estimator.fit(), the returned Model should inherit the uid from its Estimator creator. at the end of the day that doesn't seem to work: Param pipeStage_1__pramName does not belong to pipeStage_1. I can't find any tutorial or explanation about this. What am I supposed to do? ### CompsciOverflow #### Find orientation graph of undirected graph that mimimizes absolute difference of in-degree and out degree Here's a question from our uni's ICPC programming competition selections. I'm stating it in simpler terms here. Given an undirected graph, orient the edges of the graph in such a manner that the maximum absolute difference of out-degree and in degree of a node is minimised. An example of this is a chain 1-2-3 where answer is simply 1 as we can orient edges in a single same direction. Constraints: Vertices$\leq 10^5$, edges$\leq 10^5$The brute force is obvious, but I don't understand any other methods of solving this. I suspect the solution would be some what greedy although. ### StackOverflow #### Graphically, how does the non-linear activation function project the input onto the classification space? I am finding a very hard time to visualize how the activation function actually manages to classify non-linearly separable training data sets. Why does the activation function (e.g tanh function) work for non-linear cases? What exactly happens mathematically when the activation function projects the input to output? What separates training samples of different classes, and how does this work if one had to plot this process graphically? I've tried looking for numerous sources, but what exactly makes the activation function actually work for classifying training samples in a neural network, I just cannot grasp easily and would like to be able to picture this in my mind. ### CompsciOverflow #### How do computers keep track of time? How are computers able to tell the exactly correct time and date every time I switch it on? Whenever I close the computer (shut it down) all connections and processes inside stop. How is it that when I open the computer again it tells the exact correct time? Does the computer not shut down completely when I shut it down? Are there some processes still running in it? But then how does my laptop tell the correct time when I take out the battery (and thus forcibly stop all processes) and start it again after a few days? #### Generation of chords through a computer program [on hold] This question is about generating chords through a computer program. Let us say we have generated C, E and G notes using a program. Now if we generate a new note by simply adding these 3 notes the resulting note does not sound like a chord. This might be because in mechanical musical instruments sound comes from three separate hammers and the addition occurs in the air ? Does it mean we require 3 loud speakers to play the 3 notes simultaneously to get the chord effect? What is the way of generating chords using a single speaker ? Synthesizers seem to do that. ### StackOverflow #### How to decompose my sparse Matrix using SVDLIBC...? I am new to SVDLIBC. I tried to decompose a sample sparse matrix but it is not getting loaded, but throwing an error instead. I also tried the mentioned SVDLIBC example of a sparse matrix. Its throwing following error: praveen@praveen-Aspire-4739Z:~/Downloads/SVDLIBC$ ./svd -r st -w sth
Computing the SVD... SOLVING THE [A^TA] EIGENPROBLEM .....
NUMBER OF LANCZOS STEPS = 3 RITZ VALUES STABILIZED = 3
SINGULAR VALUES FOUND = 3 SIGNIFICANT VALUES = 3
ELAPSED CPU TIME = 0 sec. MULTIPLICATIONS BY A = 10 MULTIPLICATIONS BY A^T = 7

#### Symbolic regression constant fitting with TensorFlow

In my symbolic regression algorithm, the main cost is optimizing the constants of all the individuals in the population to get the costs. There are two challenges:

1. I need to be able to perform fitting on all the individuals as concurrently as possible on the GPU.
2. I need to be able to deal with overflow, etc in a custom way; meaning, I need to be able to compose my models of essentially arbitrary functions. Specifically, I need to be able to wrap functions with "overflow protection" so that, say, x^x^x returns MAX_INT instead of some weird overflow error.

Is this possible with TensorFlow? How, roughly, would I accomplish the above?

### QuantOverflow

#### Pricing foreign currency bonds - which approach is more theoretically "sound"?

You own a fixed rate corporate bond in foreign currency (let's say JPY). Your domestic currency is USD. Which of the these two approaches do you consider theoretically better?

1. Discount JPY cash flows using a yield comprised of a) JPY risk free rate + b) CDS spread for corresponding WAL of the security. Convert JPY NPV to USD using spot.

2. Convert JPY cash flows into USD using USDJPY fx forward rates. Discount using a USD yield for corresponding WAL and rating.

Each approach comes with a different value. Significantly enough that they cannot be considered equivalent. I believe the approach 1. is more sound. What are your thoughts? Any literature out there?

### arXiv Networking and Internet Architecture

#### Improved MDS-based Algorithm for Nodes Localization in Wireless Sensor Networks. (arXiv:1606.07389v1 [cs.NI])

With the recent development of technology, wireless sensor networks (WSN) are becoming an important part of many applications. Knowing the exact location of each sensor in the network is very important issue. Therefore, the localization problem is a growing field of interest. Adding GPS receivers to each sensor node is costly solution and inapplicable on nodes with limited resources. Additionally, it is not suitable for indoor environments. In this paper, we propose an algorithm for nodes localization in WNS based on multidimensional scaling (MDS) technique. Our approach improves MDS by distance matrix refinement. Using extensive simulations we investigated in details our approach regarding different network topologies, various network parameters and performance issues. The results from simulations show that our improved MDS (IMDS) algorithm outperforms well known MDS-MAP algorithm [1] in terms of accuracy.

#### A taxonomy of localization techniques based on multidimensional scaling. (arXiv:1606.07388v1 [cs.NI])

Localization in Wireless Sensor Networks (WSNs) has been a challenging problem in the last decade. The most explored approaches for this purpose are based on multidimensional scaling (MDS) technique. The first algorithm that introduced MDS for nodes localization in sensor networks is well known as MDS-MAP. Since its appearance in 2003, many variations of MDS-MAP have been proposed in the literature. This paper aims to provide a comprehensive survey of the localization techniques that are based on MDS. We classify MDS-based algorithms according to different taxonomy features and different evaluation metrics.

#### Application of Wireless Sensor Networks for Indoor Temperature Regulation. (arXiv:1606.07386v1 [cs.NI])

Wireless sensor networks take a major part in our everyday lives by enhancing systems for home automation, healthcare, temperature control, energy consumption monitoring, and so forth. In this paper we focus on a system used for temperature regulation for residential, educational, industrial, and commercial premises, and so forth. We propose a framework for indoor temperature regulation and optimization using wireless sensor networks based on ZigBee platform. This paper considers architectural design of the system, as well as implementation guidelines. The proposed system favors methods that provide energy savings by reducing the amount of data transmissions through the network. Furthermore, the framework explores techniques for localization, such that the location of the nodes can be used by algorithms that regulate temperature settings.

#### Nodes Localization in 3D Wireless Sensor Networks Based on Multidimensional Scaling Algorithm. (arXiv:1606.07385v1 [cs.NI])

In the recent years, there has been a huge advancement in wireless sensor computing technology. Today, wireless sensor network (WSN) has become a key technology for different types of smart environment. Nodes localization in WSN has arisen as a very challenging problem in the research community. Most of the applications for WSN are not useful without a priory known nodes positions. Adding GPS receivers to each node is an expensive solution and inapplicable for indoor environments. In this paper, we implemented and evaluated an algorithm based on multidimensional scaling (MDS) technique for three-dimensional (3D) nodes localization in WSN using improved heuristic method for distance calculation. Using extensive simulations we investigated our approach regarding various network parameters. We compared the results from the simulations with other approaches for 3D-WSN localization and showed that our approach outperforms other techniques in terms of accuracy.

#### Low Power Wide Area Networks: A Survey. (arXiv:1606.07360v1 [cs.NI])

Low Power Wide Area (LPWA) networks are attracting a lot of attention primarily because of their ability to offer affordable connectivity to the low-power devices distributed over very large geographical areas. In realizing the vision of the Internet of Things (IoT), LPWA technologies complement and sometimes supersede the conventional cellular and short range wireless technologies in performance for various emerging smart city and machine-to-machine (M2M) applications. This survey paper presents the design goals and the techniques, which different LPWA technologies exploit to offer wide-area coverage to low-power devices at the expense of low data rates. We survey several emerging LPWA technologies and the standardization activities carried out by different standards development organizations (e.g., IEEE, IETF, 3GPP, ETSI) as well as the industrial consortia built around individual LPWA technologies (e.g., LORa Alliance,WEIGHTLESS-SIG, and DASH7 Alliance). We further note that LPWA technologies adopt similar approaches, thus sharing the same limitations and challenges. This paper expands on these research challenges and identifies potential directions to address them. While the proprietary LPWA technologies are already hitting the market with large nationwide roll-outs, this paper encourages an active engagement of the research community in solving problems that will shape the connectivity of tens of billions of devices in the next decade.

#### Conjunctive Query Answering via a Fragment of Set Theory (Extended Version). (arXiv:1606.07337v1 [cs.LO])

We address the problem of Conjunctive Query Answering (CQA) for the description logic $\dlssx$ ($\shdlssx$, for short) which extends the logic $\dlss$ with Boolean operations on concrete roles and with the product of concepts.

The result is obtained by formalizing $\shdlssx$-knowledge bases and $\shdlssx$-conjunctive queries in terms of formulae of the four-level set-theoretic fragment $\flqsr$, which admits a restricted form of quantification on variables of the first three levels and on pair terms. We solve the CQA problem for $\shdlssx$ through a decision procedure for the satisfiability problem of $\flqsr$. We further define a \ke\space based procedure for the same problem, more suitable for implementation purposes, and analyze its computational complexity.

#### Covariance estimation for vertically partitioned data in a distributed environment. (arXiv:1606.07336v1 [cs.DC])

The major sources of abundant data are constantly expanding with the available data collection methodologies in various applications - medical, insurance, scientific, bio-informatics and business. These data sets may be distributed geographically, rich in size and as well as dimensions also. To analyze these data sets to find out the hidden patterns, it is required to down- load the data to a centralized site which is a challenging task in terms of the limited bandwidth available and computationally also expensive. The covariance matrix is one of the methods to estimate the relation between any two dimensions. In this paper, we propose a communication efficient algorithm to estimate the covariance matrix in a distributed manner. The global covariance matrix is computed by merging the local covariance matrices using a distributed approach. The results show that it is exactly same as centralized method with good speed-up in terms of computation. The reason for speed-up is because of the parallel construction of local covariances and distributing the cross-covariances among the nodes so that the load is balanced. The results are analyzed by considering Mfeat data set on the various partitions which address the scalability also.

#### Fault-Tolerant Adaptive Parallel and Distributed Simulation. (arXiv:1606.07310v1 [cs.DC])

Discrete Event Simulation is a widely used technique that is used to model and analyze complex systems in many fields of science and engineering. The increasingly large size of simulation models poses a serious computational challenge, since the time needed to run a simulation can be prohibitively large. For this reason, Parallel and Distributes Simulation techniques have been proposed to take advantage of multiple execution units which are found in multicore processors, cluster of workstations or HPC systems. The current generation of HPC systems includes hundreds of thousands of computing nodes and a vast amount of ancillary components. Despite improvements in manufacturing processes, failures of some components are frequent, and the situation will get worse as larger systems are built. In this paper we describe FT-GAIA, a software-based fault-tolerant extension of the GAIA/ART\IS parallel simulation middleware. FT-GAIA transparently replicates simulation entities and distributes them on multiple execution nodes. This allows the simulation to tolerate crash-failures of computing nodes; furthermore, FT-GAIA offers some protection against byzantine failures since synchronization messages are replicated as well, so that the receiving entity can identify and discard corrupted messages. We provide an experimental evaluation of FT-GAIA on a running prototype. Results show that a high degree of fault tolerance can be achieved, at the cost of a moderate increase in the computational load of the execution units.

#### Proceedings Fifteenth Conference on Theoretical Aspects of Rationality and Knowledge. (arXiv:1606.07295v1 [cs.GT])

The 15th Conference on Theoretical Aspects of Rationality and Knowledge (TARK) took place in Carnegie Mellon University, Pittsburgh, USA from June 4 to 6, 2015.

The mission of the TARK conferences is to bring together researchers from a wide variety of fields, including Artificial Intelligence, Cryptography, Distributed Computing, Economics and Game Theory, Linguistics, Philosophy, and Psychology, in order to further our understanding of interdisciplinary issues involving reasoning about rationality and knowledge.

These proceedings consist of a subset of the papers / abstracts presented at the TARK conference.

#### Log-based Evaluation of Label Splits for Process Models. (arXiv:1606.07259v1 [cs.DB])

Process mining techniques aim to extract insights in processes from event logs. One of the challenges in process mining is identifying interesting and meaningful event labels that contribute to a better understanding of the process. Our application area is mining data from smart homes for elderly, where the ultimate goal is to signal deviations from usual behavior and provide timely recommendations in order to extend the period of independent living. Extracting individual process models showing user behavior is an important instrument in achieving this goal. However, the interpretation of sensor data at an appropriate abstraction level is not straightforward. For example, a motion sensor in a bedroom can be triggered by tossing and turning in bed or by getting up. We try to derive the actual activity depending on the context (time, previous events, etc.). In this paper we introduce the notion of label refinements, which links more abstract event descriptions with their more refined counterparts. We present a statistical evaluation method to determine the usefulness of a label refinement for a given event log from a process perspective. Based on data from smart homes, we show how our statistical evaluation method for label refinements can be used in practice. Our method was able to select two label refinements out of a set of candidate label refinements that both had a positive effect on model precision.

#### Parallel Scheduling Algorithm based on Complex Coloring for Input-Queued Switches. (arXiv:1606.07226v1 [cs.NI])

This paper explores the application of a new algebraic method of edge coloring, called complex coloring, to the scheduling problems of input queued switches. The proposed distributed parallel scheduling algorithm possesses two important features: optimality and rearrangeability. Optimality ensures that the algorithm always returns a proper coloring with the minimum number of required colors, and rearrangeability allows partially re-coloring the existing connection patterns if the underlying graph only changes slightly. The running time of the proposed scheduling algorithm is on the order of $O(\log^2 N)$ per frame, and the amortized time complexity, the time to compute a matching per timeslot, is only $O(\log N)$. The scheduling algorithm is highly robust in the face of traffic fluctuations. Since the higher the variable density, the higher the efficiency of the variable elimination process, complex coloring provides a natural adaptive solution to non-uniform input traffic patterns. The proposed scheduling algorithm for packet switching can achieve nearly 100% throughput.

#### Energy Saving of Base Stations Sleep Scheduling for Multi-Hop Vehicular Networks. (arXiv:1606.07206v1 [cs.NI])

This paper investigates the energy saving of base station (BS) deployed in a 1-D multi-hop vehicular network with sleep scheduling strategy. We consider cooperative BS scheduling strategy where BSs can switch between sleep and active modes to reduce the average energy consumption utilizing the information of vehicular speeds and locations. Assuming a Poisson distribution of vehicles, we derive an appropriate probability distribution function of distance between two adjacent cluster heads, where a cluster is a maximal set of vehicles in which every two adjacent vehicles can communicate directly when their Euclidean distance is less than or equal to a threshold, known as the communication range of vehicles. Furthermore, the expected value of the sojourn time in the sleep mode and energy saving are obtained. The numerical results show that the sleep scheduling strategy significantly reduces the energy consumption of the base stations.

#### On Improving Capacity of Full-Duplex Small Cells with D2D. (arXiv:1606.07198v1 [cs.NI])

The recent developments in full duplex (FD) communication promise doubling the capacity of cellular networks using self interference cancellation (SIC) techniques. FD small cells with device-to-device (D2D) communication links could achieve the expected capacity of the future cellular networks (5G). In this work, we consider joint scheduling and dynamic power algorithm (DPA) for a single cell FD small cell network with D2D links (D2DLs). We formulate the optimal user selection and power control as a non-linear programming (NLP) optimization problem to get the optimal user scheduling and transmission power in a given TTI. Our numerical results show that using DPA gives better overall throughput performance than full power transmission algorithm (FPA). Also, simultaneous transmissions (combination of uplink (UL), downlink (DL), and D2D occur 80% of the time thereby increasing the spectral efficiency and network capacity.

#### On Energy Efficiency of the Nearest-Neighbor Cooperative Communication in Heterogeneous Networks. (arXiv:1606.07197v1 [cs.NI])

In this paper, we consider a two-dimensional heterogeneous cellular network scenario consisting of one base station (BS) and some mobile stations (MSs) whose locations follow a Poisson point process (PPP). The MSs are equipped with multiple radio access interfaces including a cellular access interface and at least one short-range communication interface. We propose a nearest-neighbor cooperation communication (NNCC) scheme by exploiting the short-range communication between a MS and its nearest neighbor to collaborate on their uplink transmissions. In the proposed cooperation scheme, a MS and its nearest neighbor first exchange data by the short-range communication. Upon successful decoding of the data from each other, they proceed to send their own data, as well as the data received from the other to the BS respectively in orthogonal time slots. The energy efficiency analysis for the proposed scheme is presented based on the characteristics of the PPP and the Rayleigh fading channel. Numerical results show that the NNCC scheme significantly improves the energy efficiency compared to the conventional non-cooperative uplink transmissions.

#### Mobile Converged Networks: Framework, Optimization and Challenges. (arXiv:1606.07164v1 [cs.NI])

In this paper, a new framework of mobile converged networks is proposed for flexible resource optimization over multi-tier wireless heterogeneous networks. Design principles and advantages of this new framework of mobile converged networks are discussed. Moreover, mobile converged network models based on interference coordination and energy efficiency are presented and the corresponding optimization algorithms are developed. Furthermore, future challenges of mobile converged networks are identified to promote the study in modeling and performance analysis of mobile converged networks.

#### Characterizing graphs of maximum matching width at most 2. (arXiv:1606.07157v1 [math.CO])

The maximum matching width is a width-parameter that is defined on a branch-decomposition over the vertex set of a graph. The size of a maximum matching in the bipartite graph is used as a cut-function. In this paper, we characterize the graphs of maximum matching width at most 2 using the minor obstruction set. Also, we compute the exact value of the maximum matching width of a grid.

#### Adaptive and Scalable Android Malware Detection through Online Learning. (arXiv:1606.07150v1 [cs.CR])

It is well-known that malware constantly evolves so as to evade detection and this causes the entire malware population to be non-stationary. Contrary to this fact, prior works on machine learning based Android malware detection have assumed that the distribution of the observed malware characteristics (i.e., features) do not change over time. In this work, we address the problem of malware population drift and propose a novel online machine learning based framework, named DroidOL to handle it and effectively detect malware. In order to perform accurate detection, security-sensitive behaviors are captured from apps in the form of inter-procedural control-flow sub-graph features using a state-of-the-art graph kernel. In order to perform scalable detection and to adapt to the drift and evolution in malware population, an online passive-aggressive classifier is used.

In a large-scale comparative analysis with more than 87,000 apps, DroidOL achieves 84.29% accuracy outperforming two state-of-the-art malware techniques by more than 20% in their typical batch learning setting and more than 3% when they are continuously re-trained. Our experimental findings strongly indicate that online learning based approaches are highly suitable for real-world malware detection.

#### Real-Time Synthesis is Hard!. (arXiv:1606.07124v1 [cs.LO])

We study the reactive synthesis problem (RS) for specifications given in Metric Interval Temporal Logic (MITL). RS is known to be undecidable in a very general setting, but on infinite words only; and only the very restrictive BRRS subcase is known to be decidable (see D'Souza et al. and Bouyer et al.). In this paper, we precise the decidability border of MITL synthesis. We show RS is undecidable on finite words too, and present a landscape of restrictions (both on the logic and on the possible controllers) that are still undecidable. On the positive side, we revisit BRRS and introduce an efficient on-the-fly algorithm to solve it.

#### Detecting service provider alliances on the choreography enactment pricing game. (arXiv:1606.07111v1 [cs.DC])

We present the choreography enactment pricing game, a cooperative game-theoretic model for the study of scheduling of jobs using competitor service providers. A choreography (a peer-to-peer service composition model) needs a set of services to fulfill its jobs requirements. Users must choose, for each requirement, which service providers will be used to enact the choreography at lowest cost. Due to the lack of centralization, vendors can form alliances to control the market. We show a novel algorithm capable of detecting alliances among service providers, based on our study of the bargaining set of this game.

#### Traffic dynamics on dynamical networks: The connection between network lifetime and traffic congestion. (arXiv:1606.07099v1 [cs.NI])

For many power-limited networks, such as wireless sensor networks and mobile ad hoc networks, maximizing the network lifetime is the first concern in the related designing and maintaining activities. We study the network lifetime from the perspective of network science. In our dynamic network, nodes are assigned a fixed amount of energy initially and consume the energy in the delivery of packets. We divided the network traffic flow into four states: no, slow, fast, and absolute congestion states. We derive the network lifetime by considering the state of the traffic flow. We find that the network lifetime is generally opposite to traffic congestion in that the more congested traffic, the less network lifetime. We also find the impacts of factors such as packet generation rate, communication radius, node moving speed, etc., on network lifetime and traffic congestion.

#### Finding Proofs in Tarskian Geometry. (arXiv:1606.07095v1 [cs.AI])

We report on a project to use a theorem prover to find proofs of the theorems in Tarskian geometry. These theorems start with fundamental properties of betweenness, proceed through the derivations of several famous theorems due to Gupta and end with the derivation from Tarski's axioms of Hilbert's 1899 axioms for geometry. They include the four challenge problems left unsolved by Quaife, who two decades ago found some \Otter proofs in Tarskian geometry (solving challenges issued in Wos's 1998 book). There are 212 theorems in this collection. We were able to find \Otter proofs of all these theorems. We developed a methodology for the automated preparation and checking of the input files for those theorems, to ensure that no human error has corrupted the formal development of an entire theory as embodied in two hundred input files and proofs. We distinguish between proofs that were found completely mechanically (without reference to the steps of a book proof) and proofs that were constructed by some technique that involved a human knowing the steps of a book proof. Proofs of length 40--100, roughly speaking, are difficult exercises for a human, and proofs of 100-250 steps belong in a Ph.D. thesis or publication. 29 of the proofs in our collection are longer than 40 steps, and ten are longer than 90 steps. We were able to derive completely mechanically all but 26 of the 183 theorems that have "short" proofs (40 or fewer deduction steps). We found proofs of the rest, as well as the 29 "hard" theorems, using a method that requires consulting the book proof at the outset. Our "subformula strategy" enabled us to prove four of the 29 hard theorems completely mechanically. These are Ph.D. level proofs, of length up to 108.

#### From NoSQL Accumulo to NewSQL Graphulo: Design and Utility of Graph Algorithms inside a BigTable Database. (arXiv:1606.07085v1 [cs.DB])

Google BigTable's scale-out design for distributed key-value storage inspired a generation of NoSQL databases. Recently the NewSQL paradigm emerged in response to analytic workloads that demand distributed computation local to data storage. Many such analytics take the form of graph algorithms, a trend that motivated the GraphBLAS initiative to standardize a set of matrix math kernels for building graph algorithms. In this article we show how it is possible to implement the GraphBLAS kernels in a BigTable database by presenting the design of Graphulo, a library for executing graph algorithms inside the Apache Accumulo database. We detail the Graphulo implementation of two graph algorithms and conduct experiments comparing their performance to two main-memory matrix math systems. Our results shed insight into the conditions that determine when executing a graph algorithm is faster inside a database versus an external system---in short, that memory requirements and relative I/O are critical factors.

#### Domain Name System Security and Privacy: Old Problems and New Challenges. (arXiv:1606.07080v1 [cs.CR])

The domain name system (DNS) is an important protocol in today's Internet operation, and is the standard naming convention between domain names, names that are easy to read, understand, and remember by humans, to IP address of Internet resources. The wealth of research activities on DNS in general and security and privacy in particular suggest that all problems in this domain are solved. Reality however is that despite the large body of literature on various aspects of DNS, there are still many challenges that need to be addressed. In this paper, we review the various activities in the research community on DNS operation, security, and privacy, and outline various challenges and open research directions that need to be tackled.

#### SICS: Secure In-Cloud Service Function Chaining. (arXiv:1606.07079v1 [cs.NI])

There is an increasing trend that enterprises outsource their network functions to the cloud for lower cost and ease of management. However, network function outsourcing brings threats to the privacy of enterprises since the cloud is able to access the traffic and rules of in-cloud network functions. Current tools for secure network function outsourcing either incur large performance overhead or do not support real-time updates. In this paper, we present SICS, a secure service function chain outsourcing framework. SICS encrypts each packet header and use a label for in-cloud rule matching, which enables the cloud to perform its functionalities correctly with minimum header information leakage. Evaluation results show that SICS achieves higher throughput, faster construction and update speed, and lower resource overhead at both enterprise and cloud sides, compared to existing solutions.

### QuantOverflow

#### Correlation of a lognormal asset and a normal asset

So if i want to calcualte the correlation between a pair of assets, my intuition is that i should calculate whatever correlation i plan on using;

When we look at correlation, it's normally the correlation of the log returns - which makes sense from a MC standpoint, since it's the correlated random numbers that create the returns.

If i want to simulate a set of paths of the pair of assets, and one will be simulated using a lognormal returns process and the other a random normal walk (absolute), then i should convert each time series into just the random number sequences which, when put through the process i will use, will recreate them - and then take the correlation of these numbers (assuming i'm using only historical correlation)?

i.e. work out the correlation of the underlying random sequences.

Is this correct?

### Planet Theory

#### Advanced Probabilistic Couplings for Differential Privacy

Authors: Gilles Barthe, Marco Gaboardi, Benjamin Grégoire, Justin Hsu, Pierre-Yves Strub
Abstract: Differential privacy is a promising formal approach to data privacy, which provides a quantitative bound on the privacy cost of an algorithm that operates on sensitive information. Several tools have been developed for the formal verification of differentially private algorithms, including program logics and type systems. However, these tools do not capture fundamental techniques that have emerged in recent years, and cannot be used for reasoning about cutting-edge differentially private algorithms. Existing techniques fail to handle three broad classes of algorithms: 1) algorithms where privacy depends accuracy guarantees, 2) algorithms that are analyzed with the advanced composition theorem, which shows slower growth in the privacy cost, 3) algorithms that interactively accept adaptive inputs.

We address these limitations with a new formalism extending apRHL, a relational program logic that has been used for proving differential privacy of non-interactive algorithms, and incorporating aHL, a (non-relational) program logic for accuracy properties. We illustrate our approach through a single running example, which exemplifies the three classes of algorithms and explores new variants of the Sparse Vector technique, a well-studied algorithm from the privacy literature. We implement our logic in EasyCrypt, and formally verify privacy. We also introduce a novel coupling technique called \emph{optimal subset coupling} that may be of independent interest.

#### Robust Learning of Fixed-Structure Bayesian Networks

Authors: Ilias Diakonikolas, Daniel Kane, Alistair Stewart
Abstract: We investigate the problem of learning Bayesian networks in an agnostic model where an $\epsilon$-fraction of the samples are adversarially corrupted. Our agnostic learning model is similar to -- in fact, stronger than -- Huber's contamination model in robust statistics. In this work, we study the fully observable Bernoulli case where the structure of the network is given. Even in this basic setting, previous learning algorithms either run in exponential time or lose dimension-dependent factors in their error guarantees. We provide the first computationally efficient agnostic learning algorithm for this problem with dimension-independent error guarantees. Our algorithm has polynomial sample complexity, runs in polynomial time, and achieves error that scales nearly-linearly with the fraction of adversarially corrupted samples.

### TheoryOverflow

#### Complexity of "destroying" the graph's minimum spanning tree weight

Assume we have a connected input graph $G=(V,E)$ and a weight function $w:E\to\mathbb N$. Denote by $w(G)$ the weight of a minimum spanning gree for a graph $G$. For this purpose, define $w(G')$ as $\infty$ for graph $G'$ which is not connected.

Consider the following problem:

Given an integer $k\in\mathbb N$, decide whether there exists an edge set $E'\subseteq E$ , such that $|E'|=k$ and $w((V,E\setminus E')) > w(G)$?

What is the complexity of the above problem?.

### Planet Theory

#### Optimal Evacuation Flows on Dynamic Paths with General Edge Capacities

Authors: Guru Prakash Arumugam, John Augustine, Mordecai J. Golin, Yuya Higashikawa, Naoki Katoh, Prashanth Srikanthan
Abstract: A Dynamic Graph Network is a graph in which each edge has an associated travel time and a capacity (width) that limits the number of items that can travel in parallel along that edge. Each vertex in this dynamic graph network begins with the number of items that must be evacuated into designated sink vertices. A $k$-sink evacuation protocol finds the location of $k$ sinks and associated evacuation movement protocol that allows evacuating all the items to a sink in minimum time. The associated evacuation movement must impose a confluent flow, i.e, all items passing through a particular vertex exit that vertex using the same edge. In this paper we address the $k$-sink evacuation problem on a dynamic path network. We provide solutions that run in $O(n \log n)$ time for $k=1$ and $O(k n \log^2 n)$ for $k >1$ and work for arbitrary edge capacities.

#### Generalized Preconditioning and Network Flow Problems

Authors: Jonah Sherman
Abstract: We consider approximation algorithms for the problem of finding $x$ of minimal norm $\|x\|$ satisfying a linear system $\A x = \b$, where the norm $\|\cdot \|$ is arbitrary and generally non-Euclidean. We show a simple general technique for composing solvers, converting iterative solvers with residual error $\|\A x - \b\| \leq t^{-\Omega(1)}$ into solvers with residual error $\exp(-\Omega(t))$, at the cost of an increase in $\|x\|$, by recursively invoking the solver on the residual problem $\tilde{\b} = \b - \A x$. Convergence of the composed solvers depends strongly on a generalization of the classical condition number to general norms, reducing the task of designing algorithms for many such problems to that of designing a \emph{generalized preconditioner} for $\A$. The new ideas significantly generalize those introduced by the author's earlier work on maximum flow, making them more widely applicable.

As an application of the new technique, we present a nearly-linear time approximation algorithm for uncapacitated minimum-cost flow on undirected graphs. Given an undirected graph with $m$ edges labelled with costs, and $n$ vertices labelled with demands, the algorithm takes $\epsilon^{-2}m^{1+o(1)}$-time and outputs a flow routing the demands with total cost at most $(1+\epsilon)$ times larger than minimal, along with a dual solution proving near-optimality. The generalized preconditioner is obtained by embedding the cost metric into $\ell_1$, and then considering a simple hierarchical routing scheme in $\ell_1$ where demands initially supported on a dense lattice are pulled from a sparser lattice by randomly rounding unaligned coordinates to their aligned neighbors. Analysis of the generalized condition number for the corresponding preconditioner follows that of the classical multigrid algorithm for lattice Laplacian systems.

### CompsciOverflow

#### What is this approximation/error-reduction method called?

I'm wondering if anyone could help me find my footing in an approach I am taking with a student in my audio programming class for creating more accurate pitch detection algorithms. But the approach is not limited to pitch detection and in fact seems seems similar to Newton's Method, Euler's Method, Horner's Method, and so on. It is a very simple and general idea, and must have some background in numerical methods. I am looking for pointers to the literature.

Here is the idea. We have a function f which takes a signal and returns the fundamental frequency (such algorithms are close cousins to the Discrete Fourier Transform). In order to test its accuracy, I created simple sine wave signals of precise frequencies and tested the algorithm, and graphed the errors over a particular range; basically a perfect f would be the identity function, so we just had to record the deviation from the identity. The errors are basically sinusoidal. So I stored the errors in an array, and use cubic interpolation to create a continuous error function, and built that into the last stage of the algorithm. Of course, there is a problem, because the errors showed the deviation from a perfect f, and the original f is not perfect, so there would be errors in the errors, so to speak. So I iterated the process, correcting successively for errors in the errors, and the algorithm gets better each time. I have not yet figured out whether it will converge to some minimal error. I also have not tested it in musical settings. But it is very promising, and seems like a generally useful technique.

Separate from a programming trick, I would like to understand some of its properties such as convergence and so on. Anyone have any pointer, keywords, etc. for me to pursue this? I'm guessing it is a standard technique in numerical methods.

### Wes Felter

#### "By and large, considerations of geopolitics/realpolitik are missing from the debate on Internet..."

“By and large, considerations of geopolitics/realpolitik are missing from the debate on Internet governance”

- The Register

### CompsciOverflow

#### Grammar for concrete syntax

It is given concrete syntax:
data Stmt = SExp Exp | String := Exp | SBlock [Stmt]
data Exp = EInt Integer | EVar String | Exp + Exp

I am going to find concrete syntax (CFG) for this data types. Could you help me to do it ? I have never before done it, it is why I am asking for help.

### StackOverflow

#### Implementing compose using only Function<> in java 8

In the book Functional Programming in Java the author builds a compose function using only the Function<T, U> interface (the interface however is not one shipped with Java 8 but very similar though) the snippet of which is show below

public interface Function<T, U> {
U apply(T arg);
}

though I could understand the method version of compose below, which takes in 2 functions and return a composed function

public static final Function<Integer, Integer> compose (final Function<Integer, Integer> f1,
final Function<Integer, Integer> f2) {
arg -> f1.apply(f2.apply(arg));
}

I just couldn't get my head around the below compose implementation with Function<> and lambdas

static Function<Function<Integer, Integer>,
Function<Function<Integer, Integer>,
Function<Integer, Integer>>> compose =
x -> y -> z -> x.apply(y.apply(z));

PS: couldn't get my mind out of this and move forward with remaining sections :(

### CompsciOverflow

#### How to choose between UC and PL when using the DPLL algorithm?

We know DPLL algorithm is backtracking + unit propagation + pure literal rule.

I have an example. There is one example to solve following Satisfiability problem with DPLL. if assign of "0" to variables is prior to assign "1" to variables, Which of Unit Clause (UC) or Pure Literal (PL) is used to solve this specific example?

$\{\lnot A \lor B \lor C\}, \{A \lor \lnot B \lor C\}, \{A \lor B \lor \lnot C\}, \{A \lor B \lor C\}$

Olympiad Solution is: PL and UC.

Our Solution is just UC.

Who can satisfy me why Olympiad solution is correct ?!

Unit propagation is not possible as there are no unit clauses.

Pure literal rule is not applicable as there is no literals that occur only positively or only negatively.

Update: I think in node (3) we can user PL or UC. Isn't it?

### StackOverflow

#### Revertible Dimension Reduction for Many Dimensions

Is there a list of dimension reduction techniques which are revertible (I can go back and forth between sub manifold and original space) and scale well to large (>1million dimensions)? The vector is very sparse. Ideally I could find a rough subspace of interest on a small number of examples (reduce to ~10k dimensions), and then train something like an autoencoder on the subspace for fine control.

• Going straight for an auto-encoders is probably out of the picture as a FC layers with >1M input size becomes very difficult memory wise.
• PCA is an option, but has a huge memory footprint.
• Random Projection doesn't seem to have a way to revert back to the original space?

Thanks!

### TheoryOverflow

#### A "pumping lemma" for linear indexed languages

I have a language which I know is an indexed language. I suspect it is not a linear indexed language, but I do not know how to show that it isn't. On the Wikipedia page linked above for indexed languages there are references for a "pumping lemma" and "shrinking lemma" for indexed languages, but nothing for linear indexed languages. I am wondering if there are any known techniques for showing a language in not a linear indexed language.

### Fefe

#### Ein US-Bundesgericht in Virginia (wo die ganzen Geheimdienste ...

Ein US-Bundesgericht in Virginia (wo die ganzen Geheimdienste sitzen) hat entschieden, dass es keine "reasonable expectation of privacy" beim Heim-PC zuhause gibt.

Laut Rechtsdoktrin in den USA ist das der Test dafür, ob die "Bedarfsträger" für den Zugriff einen Durchsuchungsbefehl brauchen. Brauchen sie nach dieser Entscheidung nicht.

### HN Daily

#### Daily Hacker News for 2016-06-23

The 10 highest-rated articles on Hacker News on June 23, 2016 which have not appeared on any previous Hacker News Daily are:

## June 23, 2016

### infra-talk

#### Auto-renewal of a Let’s Encrypt certificate

I configured this blog to use a free, automatically-issued Let's Encrypt SSL certificate around 6 months ago.

The command to issue the cert is as follows:

letsencrypt-auto certonly
-a webroot
--webroot-path /var/www/sites/blog.yo61.com/html/
-d blog.yo61.com
--agree-tos
--email robin.bowes@example.com

To check if an existing certificate will expire within the next 28 days, use this command:

openssl x509
-checkend 2419200
-noout
-inform pem
-in /etc/letsencrypt/live/blog.yo61.com/cert.pem

Put these together, and run from a daily cron job (remembering to restart your web server after changing the certificate) and your cert will automatically renew 28 days before it expires.

openssl x509
-checkend 2419200
-noout
-inform pem
-in /etc/letsencrypt/live/blog.yo61.com/cert.pem ||
letsencrypt-auto certonly
-a webroot
--webroot-path /var/www/sites/blog.yo61.com/html/
-d blog.yo61.com
--agree-tos
--email robin.bowes@example.com &&
systemctl restart httpd

### DragonFly BSD Digest

#### BSDNow 147: Release all the things!

BSDNow 147 is available, with an interview of Glen Barber and Peter Wemm.  They’re talking about release engineering, as you may have guessed from the title.

### StackOverflow

#### What is the purpose of weights and biases in tensorflow word2vec example?

I'm trying to understand how word2vec example works and don't really understand what is the purpose of weights and biases passed into nse_loss function. There are two variable inputs into the function: weights (plus biases) and embedding.

# Look up embeddings for inputs.
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, train_inputs)

# Construct the variables for the NCE loss
nce_weights = tf.Variable(
tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

Both are randomly initialized and (as far as I understand) both are subject to updates during learning.

# Compute the average NCE loss for the batch.
loss = tf.reduce_mean(
tf.nn.nce_loss(nce_weights, nce_biases, embed, train_labels,
num_sampled, vocabulary_size))

I suppose both of them should represent trained model. However weights and biases are never used later on for similarity calculations. Instead, only one component is used:

# Compute the cosine similarity between minibatch examples and all embeddings.
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
normalized_embeddings = embeddings / norm
valid_embeddings = tf.nn.embedding_lookup(
normalized_embeddings, valid_dataset)
similarity = tf.matmul(
valid_embeddings, normalized_embeddings, transpose_b=True)

So what about second component of the model? Why weighs and biases are being ignored?

Thank you.

### CompsciOverflow

#### Trouble extracting only related text from the pdf file? [on hold]

The use of Beautiful Soup is not needed here. I have created pdf file and now trying to use pdfminer to extract only text. The problem is how to remove additional text which is the part of advertisements.

### TheoryOverflow

#### How to interpret these adiabatic evolutions?

I was trying to study the adiabatic Hamiltonian defined in the paper (arXiv:1207.1712) titled 'Solving the Graph Isomorphism Problem with a Quantum Annealer'.

My case is the cycle graph $C_n$ when $n$ is even.

Following are the eigenvalues of the ground state and the first excited state plotted for s = 0 to 1 for n = 4, 6, 8, 10, 16, 128.

C_4:

C_6:

C_8:

C_10:

C_16:

C_128:

None of these plots show that the levels are crossing but at the same time they are either not continuous or touching.

How should I interpret them?

### CompsciOverflow

#### Trouble proving Big Theta Statement [duplicate]

I am browsing through MIT CS exams. For the task

State whether each of the following claims is True or False and prove your answer.

I found the following statement: n! is Θ (n^n)

Seems easy to me but I have trouble proving it. So I've written down the inequality

c1*n^n <= n! <= c2*n^n

My feeling: is that this is wrong due to the first inequality (c1*n^n <= n!), as n^n is bigger than n! and because it is growing dependent on n we can't find a positive constant c1 to make it less than n! (towards infinity). How can I write this down OR am I wrong in the first place?

I think too complicated.

### Lobsters

#### Monkit: Tracing and metrics library for Go

This package is somewhat of a combination of a distributed tracing library (with built-in graph generation), and something more like @codahale’s metrics library. Integrations are driven by plugins, but we have plugins for Graphite and Zipkin already. It’s been very useful to us. Let me know what you think!

### CompsciOverflow

#### Why doesn't subset sum solution violate Exponential Time Hypothesis?

The quickest algorithm for solving subset sum currently is $2^{n/2}$ (via Wiki). Why doesn't this violate the Exponential Time Hypothesis which states that “there is no family of algorithms that can solve 3-SAT in $2^{o(n)}$ time.”

Couldn't a 3-SAT problem be translated to a subset sum problem in polynomial time and then solved in $2^{n/2}$ time. What am I missing here?

### QuantOverflow

Are there any free c++ libraries that would have some of the functions that would be used in developing a trading strategy. For instance, calculating drawdown, Volatility Forecasting, MAE, MFE....etc.

I know I could code these but this would help me save some time and focus on the strategy and not the report generations.

### CompsciOverflow

#### Shorten Length Reduction

I've stumbled upon this Question:

• We say that a reduction $f$ of a language $A$ to a language $B$ is a Shorten length reduction, if there exists a number $n\in N$ s.t for every $w\in A$, s.t if $|w| \geq n \ than \ |f(w)| < |w|$. Also, $A,B \notin \{\Sigma^*,\emptyset \}$. Prove there is such a reduction which is also polynomial-time reduction from any languages $A,B \in P$.
• This was my approach: Take $w'$ to be the shortest word in $B$ and define $f(w) = w'$ for every $w \in A$. I'm pretty sure the idea is correct because we simply choose $n = |w'| + 1$ and then obviously for every $w \ s.t \ |w| >= n \ we \ get \ |w| > n-1 = |w'| = |f(w)|$

So my question is, can I really choose a word $w'$ to be the shortest word in $B$?

Also, am I correct with my approach?

Thanks!

#### Repeated point in polygon: preprocessing complexity given logarithmic query time?

I am interested in the repeated point in polygon problem, where one is given a polygon in a preprocessing phase and in the online phase, one is asked whether a point is in that polygon. The polygon is not necessarily simple. I am interested in the case where a polygon has many edges.

Let $n$ be the number of vertices of the polygon. I have an algorithm that performs the online phase in $O(\log n)$ time but the preprocessing phase requires $O(n^3)$ time and space in general, $O(n^2)$ time and space for simple polygons, and $O(n)$ time for star-shaped polygons.

Can we do better? Specifically, does anyone know lower or upper bounds, constructive or not, for the preprocessing phase given $O(\log n)$ query time?

I'm working in two dimensions, and the points have floating point coordinates (i.e., not integer coordinates).

My algorithm is similar to a standard algorithm and has similar performance characteristics. I discovered it independently so it differs in some immaterial ways but it's not original. It works as follows:

Preprocessing:

1. Translate polygon into first quadrant. (This is to simplify later calculations but isn't strictly necessary.)
2. Create a ray passing from the origin through every endpoint and intersection point of an edge. In each resulting cone no edge crosses another.
3. For each cone, sort the edges crossing that cone by their distance from the origin.

Online:

1. Apply the same translation to the query point as was applied to the polygon.
2. Using binary search, find the cone containing the point, or if none, return false.
3. Using binary search, find the number of edges in the point's cone for which the point and origin are on the same side. Return whether this number is odd.

### StackOverflow

#### Calculating RMS Error with Monte Carlo Markov Chain

I'm trying to find the optimal delta in markov chain for calculating the volume of a torus. The program finds the volume of the torus in both ways - Markov Chain and Monte Carlo, multiple times. I intend to use the R.M.S error to find which delta is the best(the one that has the smallest RMS error is the most accurate). Anyone has an idea? here's my code:

from random import uniform
def Monte_Carlo(R,r,N):
count=0
for i in range(N):
x=uniform(0,R+r)
y=uniform(0,R+r)
z=uniform(0,R+r)
if (R-(x**2+y**2)**0.5)**2+z**2<=r**2:
count=count+1
V=(count*8*(R+r)**3)/N
return (V)

import random
def torus_marakov(delta, n_trials, n_hits,R,r):
x, y, z = 1.0, 1.0, 1.0
for i in range(n_trials):
del_x,del_y,del_z=random.uniform(-delta,delta), random.uniform(-    delta,delta), random.uniform(-delta,delta)
if abs(x+del_x)< (R+r) and abs(y+del_y)<R+r and abs(z+del_z)<(R+r):
x, y, z = x+del_x, y+ del_y,z+del_z
if (R-(x**2+y**2)**0.5)**2+z**2<=r**2: n_hits+=1
return ((n_hits)*8*(R+r)**3)/float(n_trials)

def markov_multirun(delta,n_runs=10000,n_trials=4000):
l=[torus_marakov(delta,n_trials,0,R,r) for i in range (n_runs)]
return (l)

#### Is it ok to only use one epoch?

I'm training a neural network in TensorFlow (using tflearn) on data that I generate. From what I can tell, each epoch we use all of the training data. Since I can control how many examples I have, it seems like it would be best to just generate more training data until one epoch is enough to train the network.

So my question is: Is there any downside to only using one epoch, assuming I have enough training data? Am I correct in assuming that 1 epoch of a million examples is better than 10 epochs of 100,000?

#### What is the suggested practice for storing multiple runs of a summary writer in TensorFlow?

I am learning to use TensorBoard and every time I launch tensorboard I get in my terminal the message:

WARNING:tensorflow:Found more than one graph event per run. Overwriting the graph with the newest event.

I assume is because I've run the same model multiple times with the same name. I just want to run my model multiple times and be able to inspect what its doing using tensorflow. Is just re running:

tensorboard --logdir=path/to/log-directory

not the usual way to do it? Or what is the suggestion for doing this type of work when I want to run the same model multiple times with and explore different learning algorithms, step-sizes, initilization, etc. Is it really neccessary to set up a new log directory each time?

### StackOverflow

#### Why does TensorFlow create extra name spaces for my variables in the TensorBoard visualization?

I create variables as follows:

x = tf.placeholder(tf.float32, shape=[None, D], name='x-input') # M x D
# Variables Layer1
#std = 1.5*np.pi
std = 0.1
W1 = tf.Variable( tf.truncated_normal([D,D1], mean=0.0, stddev=std, name='W1') ) # (D x D1)
S1 = tf.Variable(tf.constant(100.0, shape=[1], name='S1')) # (1 x 1)
C1 = tf.Variable( tf.truncated_normal([D1,1], mean=0.0, stddev=0.1, name='C1') ) # (D1 x 1)

but for some reason tensorflow adds extra variable blocks in my visualization:

Why is it doing this and how do I stop it?

### Fefe

#### Kolumbien und FARC haben einen Waffenstillstand unterzeichnet. ...

Kolumbien und FARC haben einen Waffenstillstand unterzeichnet. FARC hatte vorher einer Entwaffnung zugestimmt.

#### Trumps Politikberater äußert sich zu Trumps Israelpolitik.Trump ...

Trumps Politikberater äußert sich zu Trumps Israelpolitik.

Trump findet die Zweistaatenlösung doof. Israel solle das mal selber entscheiden, wie mit den Palästinensern umzugehen ist.

#### Es hat sich spontan noch eine Option aufgetan, wie ...

Es hat sich spontan noch eine Option aufgetan, wie Trump nicht Präsidentschaftskandidat der Republikaner wird: Weil sich seine Delegierten die Anreise nicht leisten können.
At least seven Trump delegates have resorted to crowdfunding to send themselves to the convention, setting up pages on GoFundMe, a popular fundraising platform, asking for sums ranging from $1,000 to to$10,000.
Wie krass ist DAS denn!? Au Mann.

#### Eine ansteckende Krebsart kann sich im Seewasser von ...

Eine ansteckende Krebsart kann sich im Seewasser von Muschel zu Muschel fortpflanzen. Genetisch unterscheidet sich der Krebs vom Wirt. Und der Krebs hüpft zwischen Spezies.

Das ist ja mal Alien-Style Gruselkino vom Feinsten! Weia.

### StackOverflow

#### Spark Random Forest error

This is my first time using Mlib in Spark. I am trying to run a Random Forest

model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
numTrees=3, featureSubsetStrategy="auto",
impurity='gini', maxDepth=4, maxBins=40)

but I get the error

Py4JJavaError                             Traceback (most recent call last)
<ipython-input-49-5a8de04ff14b> in <module>()
4 model = RandomForest.trainClassifier(trainingData, numClasses=2,         categoricalFeaturesInfo={},
5                                      numTrees=2,   featureSubsetStrategy="auto",
----> 6                                      impurity='gini', maxDepth=4,    maxBins=40)

/opt/spark/current/python/pyspark/mllib/tree.py in trainClassifier(cls,data, numClasses, categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed)
377         return cls._train(data, "classification", numClasses,
378                           categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity,
--> 379                           maxDepth, maxBins, seed)
380
381     @classmethod

/opt/spark/current/python/pyspark/mllib/tree.py in _train(cls, data, algo, numClasses, categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed)
294         model = callMLlibFunc("trainRandomForestModel", data, algo, numClasses,
295                               categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity,
--> 296                               maxDepth, maxBins, seed)
297         return RandomForestModel(model)
298

/opt/spark/current/python/pyspark/mllib/common.py in callMLlibFunc(name, *args)
128     sc = SparkContext.getOrCreate()
129     api = getattr(sc._jvm.PythonMLLibAPI(), name)
--> 130     return callJavaFunc(sc, api, *args)
131
132

/opt/spark/current/python/pyspark/mllib/common.py in callJavaFunc(sc, func, *args)
121     """ Call Java Function """
122     args = [_py2java(sc, a) for a in args]
--> 123     return _java2py(sc, func(*args))
124
125

/opt/spark/current/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
812         return_value = get_return_value(
--> 813             answer, self.gateway_client, self.target_id, self.name)
814
815         for temp_arg in temp_args:

/opt/spark/current/python/pyspark/sql/utils.py in deco(*a, **kw)
43     def deco(*a, **kw):
44         try:
---> 45             return f(*a, **kw)
46         except py4j.protocol.Py4JJavaError as e:
47             s = e.java_exception.toString()

/opt/spark/current/python/lib/py4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
306                 raise Py4JJavaError(
307                     "An error occurred while calling {0}{1}{2}.\n".
--> 308                     format(target_id, ".", name), value)
309             else:
310                 raise Py4JError(

Py4JJavaError: An error occurred while calling o1123.trainRandomForestModel.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0    in stage 94.0 failed 4 times, most recent failure: Lost task 0.3 in stage 94.0 (TID 680, mapr5-217.jiwiredev.com): java.lang.RuntimeException: No bin was found for continuous feature. This error can occur when given invalid data values (such as NaN). Feature index: 20.  Feature value: 1670.0
at org.apache.spark.mllib.tree.impl.TreePoint$.findBin(TreePoint.scala:131) at org.apache.spark.mllib.tree.impl.TreePoint$.org$apache$spark$mllib$tree$impl$TreePoint$$labeledPointToTreePoint(TreePoint.scala:84) at org.apache.spark.mllib.tree.impl.TreePoint$$anonfun$convertToTreeRDD$2.apply(TreePoint.scala:66)
at org.apache.spark.mllib.tree.impl.TreePoint$$anonfunconvertToTreeRDD2.apply(TreePoint.scala:65) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon11.next(Iterator.scala:328) at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:283) at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.ExecutorTaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutorWorker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.orgapachesparkschedulerDAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) at org.apache.spark.scheduler.DAGScheduler$$anonfunabortStage1.apply(DAGScheduler.scala:1419) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
at org.apache.spark.scheduler.DAGScheduler$$anonfunhandleTaskSetFailed1.apply(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.util.EventLoop$$anon1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.collect(RDD.scala:926)
at org.apache.spark.rdd.PairRDDFunctions$$anonfuncollectAsMap1.apply(PairRDDFunctions.scala:741) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$collectAsMap$1.apply(PairRDDFunctions.scala:740)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.PairRDDFunctions.collectAsMap(PairRDDFunctions.scala:740)
at org.apache.spark.mllib.tree.DecisionTree$.findBestSplits(DecisionTree.scala:651) at org.apache.spark.mllib.tree.RandomForest.run(RandomForest.scala:233) at org.apache.spark.mllib.tree.RandomForest$.trainClassifier(RandomForest.scala:289)
at org.apache.spark.mllib.api.python.PythonMLLibAPI.trainRandomForestModel(PythonMLLibAPI.scala:751)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:209)
Caused by: java.lang.RuntimeException: No bin was found for continuous feature. This error can occur when given invalid data values (such as NaN). Feature index: 20.  Feature value: 1670.0
at org.apache.spark.mllib.tree.impl.TreePoint$.findBin(TreePoint.scala:131) at org.apache.spark.mllib.tree.impl.TreePoint$.org$apache$spark$mllib$tree$impl$TreePoint$$labeledPointToTreePoint(TreePoint.scala:84) at org.apache.spark.mllib.tree.impl.TreePoint$$anonfun$convertToTreeRDD$2.apply(TreePoint.scala:66)
at org.apache.spark.mllib.tree.impl.TreePoint$$anonfunconvertToTreeRDD2.apply(TreePoint.scala:65) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:283)
at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:268)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
... 1 more

I am feeding it a LabeledPoint. Please let me know if I should post any other code.

Any interpretation would be greatly appreciated

#### How to get hidden layer representation of features i.e distributed representation while using denoising auto encoders in theano

I have a high dimensional dataset which I have to cluster. I want to extract the features from the hidden layer of the denoising auto encoder in Theano. How can I do it ?

#### How to deal with sentiment package in R?

I am working on sentiment analysis using R for twitter data and I am using sentiment package which two main functions (classify_polarity and classify_emotion) so after using these two function how to get accuracy of these classifications and confusion matrix  and also I want to know how it provides label to the tweets , the labels that were created is not accurate ? what is the best way to do that ? to be very accurate

### CompsciOverflow

#### Deadlock free sequence of calling wait() on semaphores

I came across problem which stated following (considering s1,s2,s3,... are numbered semaphores):

• Process X calls wait() on s1,s2,s3, then enters critical section and finally calls signal() on s1,s2,s3
• Process Y calls wait() on s2,s3,s4, then enters critical section and finally calls signal() on s2,s3,s4
• Process Z calls wait() on s3,s4,s1, then enters critical section and finally calls signal() on s3,s4,s1

then below is the the deadlock free order of invoking wait()

X:s2,s1,s3
Y:s2,s3,s4
Z:s1,s3,s4

And most other orders are not deadlock free.

Now I am trying to generalize this order so that I can prepare such deadlock free order for any number of processes and semaphores. However I am unable to generalize this order to any specific pattern. For example if I consider there are four methods dealing with following semaphores:

X:s1,s2,s3,s4
Y:s2,s3,s4,s5
Z:s3,s4,s5,s1
W:s4,s5,s1,s2

The with trial and error and by observing above example, I am able to come up with 'possibly' deadlock free order of wait() calls:

X:s3,s2,s1,s4
Y:s3,s2,s4,s5
Z:s3,s4,s5,s1
W:s2,s4,s1,s5

However I am still not able to generalize the pattern.

Also, what if there are 4 processes dealing with 6 semaphores:

X:s1,s2,s3,s4
Y:s2,s3,s4,s5
Z:s3,s4,s5,s6
W:s4,s5,s6,s1

### StackOverflow

#### React: is there a better pattern for higher-order component rendering?

Here's a practical example.

import React from 'react';

constructor(props){
super(props)
}
render() {
}
}

return (<h1 className={${props.type}Heading}>{props.text}</h1>) } Heading.propTypes = { text: React.PropTypes.oneOfType([ React.PropTypes.string, React.PropTypes.element, ]), type: React.PropTypes.oneOf(['page', 'modal', 'sub', 'section']).isRequired, } export default Heading; Now, the actual HTML rendered by each component varies by element and className. <h1 className="pageHeading">{props.text}</h1> <h2 className="modalHeading">{props.text}</h2> etc. with sub and section. Now, between propTypes, elements, and classNames, is there a better way of choosing what to render and keeping them all in sync without using a switch statement? As an alternative, but similar example, here's how I have done this with an Icon class. import React from 'react'; require('./Icon.css'); const editGlyph = <path d="M5 14l-3-3 8-8 3 3zM11 2l2-2 3 3-2 2zM0 16l3-1-2-2" />; const backGlyph = <path d="M2 7.994L8.137 16h4.312L6.31 7.994 12.45 0H8.136" />; const addGlyph = <path d="M9.008 7.132V1H7.104v6.132H1v1.904h6.104v6.132h1.904V9.036h6.104V7.132" />; const requiredGlyph = <path d="M4.79 3.42V1H3.655v2.42l-2.27-.857L1 3.59l2.27.815-1.392 1.95.964.662 1.392-2.055L5.71 7.017l.88-.663-1.414-1.95 2.334-.813-.428-1.027" /> const informationGlyph = <g transform="translate(0 .61)"><path d="M6.857 5.143h-2.57V6h.856v2.57h-.857v.86h3.428v-.86h-.857" /><ellipse cx="6" cy="3.429" rx=".857" ry=".857" /><path d="M6 0C2.687 0 0 2.687 0 6s2.687 6 6 6 6-2.687 6-6-2.687-6-6-6zm0 11.143C3.164 11.143.857 8.836.857 6S3.164.857 6 .857 11.143 3.164 11.143 6 8.836 11.143 6 11.143z" /></g>; let iconGlyph; export default function Icon(props) { switch (props.glyph) { case 'add': iconGlyph = addGlyph; break; case 'back': iconGlyph = backGlyph; break; case 'edit': iconGlyph = editGlyph; break; case 'required': iconGlyph = requiredGlyph; break; case 'i': iconGlyph = informationGlyph; break; default: iconGlyph = null; break; } return ( <svg id="icon" className={[icon${props.className}`]}
viewBox="0 0 16 16"
aria-labelledby="title"
>
<title id={props.title}>{props.title}</title>
{iconGlyph}
</svg>
)
}

Icon.propTypes = {
glyph: React.PropTypes.oneOf(['add', 'back', 'edit', 'i', 'required']).isRequired,
className: React.PropTypes.string,
title: React.PropTypes.string,
}

I don't know what it is, but I can't shake the feeling that there is a more elegant way to handle this. Perhaps a Decorator with some params?